prop: xor ebx,ebx ; PROPAGATE ROUTINE movaps [nt],xmm0 movaps [rt],xmm1 fldln2 call rand fyl2x fchs ; TOT prp1: fldln2 call rand fyl2x fchs ; SCA mov ebp,[loff] fld dword [rz] fsub dword [lmin] fmul dword [lrst] fistp dword [tmp] mov eax,[tmp] xor edi,edi ; 0 cmp eax,edi cmovs eax,edi mov esi,lnum ; num-1 cmp esi,eax cmovs eax,esi lea edx,[ebp+8*eax] ; offset for the current layer %ifdef LAYRS %include "gsx.asm" %else fdiv dword [csca+edx] ; sca fld dword [cabs+edx] fdivr st2 ; tot fcomi st1 jae prp2 fxch st1 prp2: fstp st0 fld dword [cabs+edx] fmul st1 fsubp st2 ; TOT-=sca/la %endif %ifdef ACCL0 fst dword [sca] ; -- accelerate --> movss xmm6,[sca] shufps xmm6,xmm6,0x00 ; sca movaps xmm2,xmm1 subss xmm2,xmm2 ; ri mulps xmm6,xmm0 addps xmm6,xmm2 ; rf movaps xmm7,xmm2 maxps xmm7,xmm6 minps xmm6,xmm2 movss xmm2,[omr] shufps xmm2,xmm2,0x00 ; omr subps xmm6,xmm2 addps xmm7,xmm2 ; <-- accelerate -- %endif %ifdef ACCL2 movaps xmm2,[rmin] ; -- photon box --> movaps xmm3,xmm6 ; box min movaps xmm4,xmm7 ; box max subps xmm3,xmm2 subps xmm4,xmm2 movaps xmm2,[rrst] mulps xmm3,xmm2 mulps xmm4,xmm2 maxps xmm3,[pmin] minps xmm4,[pmax] minps xmm3,[qmax] maxps xmm4,[qmin] cvtps2dq xmm3,xmm3 ; sse2 convert cvtps2dq xmm4,xmm4 ; float to int movaps [sminr],xmm3 movaps [smaxr],xmm4 imul esi,[smaxx],xnum mov [smaxx],esi imul edi,[smaxy],ynum mov [smaxy],edi imul esi,[sminx],xnum mov [sminx],esi imul edi,[sminy],ynum mov [sminy],edi mov ebp,[sminz] mov esi,[sminx] xin: cmp esi,[smaxx] ; loop in x cells jg near xout mov edi,[sminy] yin: cmp edi,[smaxy] ; loop in y cells jg near yout mov ebp,[sminz] zin: cmp ebp,[smaxz] ; loop in z cells jg near zout mov eax,esi add eax,edi add eax,ebp xor ecx,ecx mov cx,[arr+2*eax] next: xor eax,eax mov ax,[els+ecx] cmp ax,0 js near none shl eax,4 lea eax,[geo+eax] ; <-- photon box -- %else mov ecx,lgeo sph3: lea eax,[geo-om_size+ecx] %endif movaps xmm2,[eax] ; -- sphere --> subss xmm2,xmm2 ; om position %ifdef ACCL1 movaps xmm3,xmm7 ; -- accelerate --> cmpltps xmm3,xmm2 ; max r < om r movaps xmm4,xmm2 cmpltps xmm4,xmm6 ; om r < min r orps xmm3,xmm4 pmovmskb edx,xmm3 cmp edx,0 jnz near sph2 %endif subps xmm2,xmm1 ; dr=dom-r movaps xmm3,xmm2 mulps xmm2,xmm0 ; dr*n mulps xmm3,xmm3 ; dr*dr shufps xmm2,xmm2,0x39 ; calculate b movss xmm4,xmm2 ; x shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; x+y shufps xmm2,xmm2,0x39 addss xmm4,xmm2 ; b=x+y+z shufps xmm4,xmm0,0x00 ; 00bb shufps xmm3,xmm3,0x39 ; calculate c movss xmm4,xmm3 ; x shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y shufps xmm3,xmm3,0x39 addss xmm4,xmm3 ; x+y+z movlps [cc],xmm4 ; 00bc fld dword [cc] fld dword [omr] fmul st0 fsubp st1 ; C=c-omr^2 fld dword [bc] fmul st0 fsubrp st1 ; D=b^2-C fldz fcomip st1 ja sph1 fsqrt fld dword [bc] fsubrp st1 ; b-sqrt(D) fldz fcomip st1 jae sph1 fcomi st1 ja sph1 fxch st1 mov ebx,[eax] ; detected! sph1: fstp st0 ; <-- sphere -- sph2: %ifdef ACCL2 add ecx,2 jmp next none: inc ebp ; -- photon box --> jmp zin zout: add edi,ynum jmp yin yout: add esi,xnum jmp xin xout: ; <-- photon box -- %else sub ecx,om_size jnz sph3 %endif fstp dword [sca] movaps xmm4,xmm0 ; -- advance -- addss xmm4,xmm5 ; 1/cm-n vector movss xmm3,[sca] shufps xmm3,xmm3,0x00 mulps xmm4,xmm3 addps xmm1,xmm4 movaps [rt],xmm1 cmp ebx,0 jnz prp3 ; done prp4: fld dword [xx] fcomip st1 ja prp3 call rotxrd movaps [nt],xmm0 jmp prp1 prp3: fstp st0 ret ; <-- prop --