#include #include #include #include #include #include #include #include #include #include // hide warnings from deprecated APIs #define CL_USE_DEPRECATED_OPENCL_1_2_APIS #ifdef __APPLE_CC__ #include #else #include #endif using namespace std; namespace xppc{ #define OFLA // omit the flasher DOM #define ROMB // use rhomb cells aligned with the array #define ASENS // enable angular sensitivity #define RAND // disable for deterministic results #define TILT // enable tilted ice layers #define ANIZ // enable anisotropic ice //#define PDIR // record photon arrival direction and impact point #define MKOW // photon yield parametrizations by M. Kowalski #define ANGW // smear cherenkov cone due to shower development #define LONG // simulate longitudinal cascade development #define CWLR // new parameterizations by C. Wiebusch and L. Raedel // requires that MKOW, ANGW, and LONG are all defined #ifdef ASENS #define ANUM 11 // number of coefficients in the angular sensitivity curve #endif #ifdef TILT #define LMAX 6 // number of dust loggers #define LYRS 170 // number of depth points #endif #ifdef ROMB #define DIR1 9.3 #define DIR2 129.3 #define CX 21 #define CY 19 #define NSTR 94 #else #define CX 13 #define CY 13 #define NSTR 94 #endif #define USMA // enable use of local shared memory //#define XAMD // enable more OpenCL-specific optimizations #define TALL // enable faster 2-stage processing, takes more memory #define OVER 10 // size of photon bunches along the muon track #define HQUO 1 // save at most photons/HQUO hits #define NPHO 1024 // maximum number of photons propagated by one thread #define WNUM 32 // number of wavelength slices #define MAXLYS 172 // maximum number of ice layers #define MAXGEO 5200 // maximum number of OMs #define MAXRND 131072 // max. number of random number multipliers #define XXX 1.e-5f #define FPI 3.141592653589793f #define OMR 0.16510f // DOM radius [m] #define BUG_with_Unload #include "pro.cxx" #include "ini.cxx" #define SHRT #include "pro.cxx" #undef SHRT void initialize(float enh = 1.f){ m.set(); d.eff*=enh; } unsigned int pmax, pmxo, pn; bool xgpu=false; void checkError(cl_int result){ if(result!=CL_SUCCESS){ cerr<<"OpenCL Error: "< > all; struct gpu{ dats d; int device, mult; cl_uint nblk; size_t nthr, ntot; unsigned int npho, pmax, pmxo; unsigned int old, num; float deviceTime; cl_event event; cl_platform_id pfID; cl_device_id devID; cl_context ctx; cl_command_queue cq; cl_program program; cl_kernel clkernel; cl_mem ed, ez, eh, ep, eo; // pointers to structures on device #ifdef TALL cl_mem bf; #endif string& replace(string& in, string old, string str){ string clx; int k=0, m=0; while((m=in.find(old, k))!=-1){ clx.append(in, k, m-k); k=m+old.length(); clx.append(str); } clx.append(in, k, in.length()-k); return in=clx; } gpu(int device) : mult(1), npho(NPHO), old(0), deviceTime(0){ this->device=device; { ostringstream o; o<<"NPHO_"<0){ mult=aux; cerr<<"Setting XMLT="<0) cerr<<"Error: TOT was a nan or an inf "<=d.hnum){ d.hidx=d.hnum; cerr<<"Error: data buffer overflow occurred!"<0){ unsigned int size=d.hidx*sizeof(hit); checkError(clEnqueueReadBuffer(cq, eh, CL_FALSE, 0, size, &q.hits[xppc::d.hidx], 0, NULL, NULL)); xppc::d.hidx+=d.hidx; } } void kernel_c(unsigned int & idx){ if(old>0) checkError(clFinish(cq)); unsigned int pn=num/OVER; if(pn>0){ unsigned int size=pn*sizeof(photon); checkError(clEnqueueWriteBuffer(cq, ep, CL_FALSE, 0, size, &q.pz[idx], 0, NULL, NULL)); idx+=pn; } } void kernel_f(){ checkError(clFinish(cq)); if(num>0){ unsigned int zero=0; checkError(clSetKernelArg(clkernel, 0, sizeof(unsigned int), &zero)); checkError(clEnqueueTask(cq, clkernel, 0, NULL, NULL)); checkError(clSetKernelArg(clkernel, 0, sizeof(unsigned int), &num)); checkError(clEnqueueNDRangeKernel(cq, clkernel, 1, NULL, &ntot, &nthr, 0, NULL, &event)); checkError(clFlush(cq)); } } void stop(){ fprintf(stderr, "Device time: %2.1f [ms]\n", deviceTime); if(clkernel) checkError(clReleaseKernel(clkernel)); if(program) checkError(clReleaseProgram(program)); if(cq) checkError(clReleaseCommandQueue(cq)); if(ctx) checkError(clReleaseContext(ctx)); } }; vector gpus; void ini(int type){ d.hnum=0; pmax=0, pmxo=0, pn=0; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++){ i->set(); i->ini(type); if(xgpu) sv++; d.hnum+=i->d.hnum; pmax+=i->pmax, pmxo+=i->pmxo; } { q.hits = new hit[d.hnum]; } { if(d.type==0) q.pz = new photon[pmxo]; } } void fin(){ for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->fin(); if(d.type==0) delete q.pz; delete [] q.hits; } void listDevices(){ cl_device_type dtype=0; char * only; only=getenv("OCPU"); if(only!=NULL) if(*only==0 || atoi(only)>0){ dtype|=CL_DEVICE_TYPE_CPU; cerr<<"Running only on CPUs!"<0){ dtype|=CL_DEVICE_TYPE_GPU; cerr<<"Running only on GPUs!"<0){ dtype|=CL_DEVICE_TYPE_ACCELERATOR; cerr<<"Running only on ACCs!"<0){ cl_device_id devices[num]; checkError(clGetDeviceIDs(platform, dtype, num, devices, NULL)); for(unsigned int j=0; j0){ d.hidx=0; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_i(); cerr<<"photons: "<::iterator i=gpus.begin(); i!=gpus.end(); i++){ i->num=over*((num*(unsigned long long) i->pmax)/(over*(unsigned long long) pmax)); sum+=i->num; } while(num>sum){ static int res=0; gpu& g=gpus[res++%gpus.size()]; if(g.num::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_c(idx); } for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->kernel_f(); if(old>0) print(); old=num; for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->old=i->num; } float square(float x){ return x*x; } int flset(int str, int dom){ int type=1; float r[3]={0, 0, 0}; if(str<0){ type=2; str=-str; } if(str==0) switch(dom){ case 1: type=3; r[0]=544.07; r[1]=55.89; r[2]=136.86; break; case 2: type=4; r[0]=11.87; r[1]=179.19; r[2]=-205.64; break; } else for(int n=0; n0){ float FLZ, FLR; sincosf(fcv*30.f, &FLZ, &FLR); FLZ*=OMR, FLR*=OMR; sft[0]=FLR*n[0]; sft[1]=FLR*n[1]; sft[2]=FLZ; r[3]+=OMR*d.ocv; } float xi; sincosf(d.up, &n[2], &xi); n[0]*=xi; n[1]*=xi; if(m!=NULL){ float o[3]={0,0,1}; float r[3]; r[0]=m[1]*o[2]-m[2]*o[1]; // m[1] r[1]=m[2]*o[0]-m[0]*o[2]; //-m[0] r[2]=m[0]*o[1]-m[1]*o[0]; // 0 float norm=sqrt(r[0]*r[0]+r[1]*r[1]+r[2]*r[2]); if(norm>0){ float cs=0; for(int i=0; i<3; i++) r[i]/=norm, cs+=o[i]*m[i]; float sn=sin(acos(cs)); //norm float R[3][3]={0}; for(int i=0; i<3; i++) for(int j=0; j<3; j++) R[i][j]=(i==j?cs:sn*r[3-i-j]*((j-i+3)%3==1?1:-1))+(1-cs)*r[i]*r[j]; float tmp[3]; for(int i=0; i<3; i++){ tmp[i]=0; for(int j=0; j<3; j++) tmp[i]+=R[i][j]*n[j]; } for(int i=0; i<3; i++) n[i]=tmp[i]; for(int i=0; i<3; i++){ tmp[i]=0; for(int j=0; j<3; j++) tmp[i]+=R[i][j]*sft[j]; } for(int i=0; i<3; i++) sft[i]=tmp[i]; } } for(int i=0; i<3; i++) r[i]+=sft[i]; } #endif void flone(unsigned long long num){ for(long long i=llroundf(num*(long double)d.eff); i>0; i-=pmax) kernel(min(i, (long long) pmax)); kernel(0); } void flasher(int str, int dom, unsigned long long num, int itr){ flini(str, dom); for(int j=0; j0) printf("\n"); } fin(); } void start(){ } void stop(){ fprintf(stderr, "\n"); for(vector::iterator i=gpus.begin(); i!=gpus.end(); i++) i->set(), i->stop(); } void choose(int device){ listDevices(); int deviceCount=all.size(); if(device<0){ if(deviceCount==0){ cerr<<"Could not find compatible devices!"<=deviceCount){ cerr<<"Device #"<1; } #include "f2k.cxx" } #ifndef XLIB using namespace xppc; int main(int arg_c, char *arg_a[]){ start(); if(arg_c<=1){ listDevices(); fprintf(stderr, "Use: %s [device] (f2k muons)\n" " %s [str] [om] [num] [device] (flasher)\n", arg_a[0], arg_a[0]); } else if(arg_c<=2){ int device=0; if(arg_c>1) device=atoi(arg_a[1]); initialize(); choose(device); fprintf(stderr, "Processing f2k muons from stdin on device %d\n", device); f2k(); } else{ int str=0, dom=0, device=0, itr=0; unsigned long long num=1000000ULL; if(arg_c>1) str=atoi(arg_a[1]); if(arg_c>2) dom=atoi(arg_a[2]); if(arg_c>3){ num=(unsigned long long) atof(arg_a[3]); char * sub = strchr(arg_a[3], '*'); if(sub!=NULL) itr=(int) atof(++sub); } if(arg_c>4) device=atoi(arg_a[4]); initialize(); choose(device); fprintf(stderr, "Running flasher simulation on device %d\n", device); flasher(str, dom, num, itr); } stop(); } #endif