#pragma OPENCL EXTENSION cl_khr_fp64 : enable #pragma OPENCL EXTENSION cl_intel_printf : enable kernel void nbody (__global uchar* world, __global uchar* data, __global uchar* result) { /***************************************Parsing the arguments from the uchar data buffer, Not sure about this, will have to ask kecheng*********************/ //reading flip from param int flip=(int)data[0]; //reading done int counter=0; __global uchar* greatptr; int bmpLength = 4000; int bsLength=4000; int baLength=4000; int pblockLength=4000; double eps=0; greatptr=(__global uchar*)&eps; for(int i=0;i<8;i++,counter++) { greatptr[i]=world[counter]; } double scaleFactor=0; greatptr=(__global uchar*)&scaleFactor; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; double gravityFactor=0; greatptr=(__global uchar*)&gravityFactor; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; double xmax=0; greatptr=(__global uchar*)&xmax; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; double ymax=0; greatptr=(__global uchar*)&ymax; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; double zmax=0; greatptr=(__global uchar*)&zmax; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; double dt=0; greatptr=(__global uchar*)&dt; for(int i=0;i<8;i++,counter++) greatptr[i]=world[counter]; //incrementing Counter to read BodySpeed directly counter+=32000; //deciding which is source for BodyMassPos int bmpCounter=0; if(flip == 1) { bmpCounter = (7 * 8); } else bmpCounter = ( 7 * 8) + (32000 * 3); //code for bodyMassPos double array double bmpDouble[4000]; __global uchar *bmptr=(__global uchar*)&bmpDouble; for(int i=0;i<(bmpLength*8);i++,bmpCounter++) { bmptr[i]=world[bmpCounter]; //world[bmpCounter-32000*3]=bmpptr } //code for bodyMassPos double4 array double4 bmp[1000]; __global double4 * bodyMassPos=(__global double4 *)&bmp; for(int i=0,j=0;i<1000;i++) { bmp[i]=(double4)(bmpDouble[j],bmpDouble[j+1],bmpDouble[j+2],bmpDouble[j+3]); j+=4; } //code for bodySpeed double array double bsDouble[4000]; __global uchar *bsptr=(__global uchar*)&bsDouble; for(int i=0;i<(bsLength*8);i++,counter++) { bsptr[i]=world[counter]; } //code for bodySpeed double4 double4 bs[1000]; __global double4 * bodySpeed=(__global double4 *)&bs; for(int i=0,j=0;i<1000;i++) { bs[i]=(double4)(bsDouble[j],bsDouble[j+1],bsDouble[j+2],bsDouble[j+3]); j+=4; } //code for bodyAccel double array double baDouble[4000]; //__global double *baDoubleptr=(__global double*)&baDouble; __global uchar *baptr=(__global uchar*)&baDouble; for(int i=0;i<(baLength*8);i++,counter++) { baptr[i]=world[counter]; } //code for bodyAccel double4 array double4 ba[1000]; __global double4 * bodyAccel=(__global double4 *)&ba; for(int i=0,j=0;i<1000;i++) { //ba[i]=(double4)(0,0,0,0); ba[i]=(double4)(baDouble[j],baDouble[j+1],baDouble[j+2],baDouble[j+3]); j+=4; } //code for bodyMassPosNew double arry double bmpNewDouble[4000]; __global uchar * bmpDblptr=(__global uchar *)&bmpNewDouble; for(int i=0;i<(bmpLength*8);i++,counter++) { bmpDblptr[i]=world[counter]; } //code for bodyMassPosNew double 4 array double4 bmpNew[1000]; __global double4 * bodyMassPosNew=(__global double4 *)&bmpNew; for(int i=0,j=0;i<1000;i++) { bmpNew[i]=(double4)(0,0,0,0); //bmpNew[i]=(double4)(bmpNewDouble[j],bmpNewDouble[j+1],bmpNewDouble[j+2],bmpNewDouble[j+3]); j+=4; } //code for pblock double4 array double4 pbl[1000]; __global double4 * pblock=(__global double4 *)&pbl; for(int i=0,j=0;i<1000;i++) { pbl[i]=(double4)(0,0,0,0); //pbl[i]=(double4)(pblDouble[j],pblDouble[j+1],pblDouble[j+2],pblDouble[j+3]); j+=4; } //eps+=eps; /**************************parsing done***********************/ const double4 dtv = (double4)(dt,dt,dt,0.0); int gti = get_global_id(0); int ti = get_local_id(0); int n = get_global_size(0); int nt = get_local_size(0); int nb = n/nt; double4 p = bodyMassPos[gti]; double4 s = bodySpeed[gti]; double4 a = (double4)(0.0,0.0,0.0,0.0); for(int jb=0; jb < nb; jb++) { pblock[ti] = bodyMassPos[jb*nt+ti]; barrier(CLK_LOCAL_MEM_FENCE); for(int j=0; jxmax) { p.x = 2*xmax-p.x; a.x = -a.x; s.x = -s.x; } if(p.y<0) { p.y = -p.y; a.y = -a.y; s.y = -s.y; } else if(p.y>ymax) { p.y = 2*ymax-p.y; a.y = -a.y; s.y = -s.y; } if(p.z<0) { p.z = -p.z; a.z = -a.z; s.z = -s.z; } else if(p.z>zmax) { p.z = 2*zmax-p.z; a.z = -a.z; s.z = -s.z; } bodyAccel[gti] = a; bodyMassPosNew[gti] = p; bodySpeed[gti] = s; /***************************calculation done******************/ //we will write 4 different arrays to result, bodyMassPos, bodySpeed, bodyAccel, bodyMassPosNew // 56, bmp, speed,accel,bmp1 /*for(int i=0;i<32000;i++) result[i]=world[i];*/ __global uchar* demoptr; double writeBack[4]; int writeCounter = (7 * 8)+(32000); //bodySpeed writeBack[0]=bodySpeed[gti].x; writeBack[1]=bodySpeed[gti].y; writeBack[2]=bodySpeed[gti].z; writeBack[3]=bodySpeed[gti].w; for(int i=0;i<32;i++,writeCounter++) { demoptr=(__global uchar*)&writeBack[i/8]; world[writeCounter+(gti*32)]=demoptr[i%8]; } //bodyAccel writeBack[0]=bodyAccel[gti].x; writeBack[1]=bodyAccel[gti].y; writeBack[2]=bodyAccel[gti].z; writeBack[3]=bodyAccel[gti].w; for(int i=0;i<32;i++,writeCounter++) { demoptr=(__global uchar*)&writeBack[i/8]; world[writeCounter+(gti*32)+32000]=demoptr[i%8]; } if(flip == 0){ writeCounter = (7 * 8); }else writeCounter = (7 * 8) + (32000 * 3); //bodyMassPosNew writeBack[0]=bodyMassPosNew[gti].x; writeBack[1]=bodyMassPosNew[gti].y; writeBack[2]=bodyMassPosNew[gti].z; writeBack[3]=bodyMassPosNew[gti].w; for(int i=0;i<32;i++,writeCounter++) { demoptr=(__global uchar*)&writeBack[i/8]; world[(gti*32) + writeCounter]=demoptr[i%8]; //result[gti*32+i]=demoptr[i%8]; //world[0]=3; //world[(gti*32)+ writeCounter]=3; } //updating result /*writeBack[0]=bodyMassPosNew[gti].x; writeBack[1]=bodyMassPosNew[gti].y; writeBack[2]=bodyMassPosNew[gti].z; writeBack[3]=bodyMassPosNew[gti].w; */ short writeArray[2]; writeArray[0]=(short ) bodyMassPosNew[gti].x; writeArray[1]=(short ) bodyMassPosNew[gti].y; for(int i=0;i<4;i++) { demoptr=(__global uchar*)&writeArray[i/2]; result[gti*4 + i]=demoptr[i%2]; } }