- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Rutger
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Please check if your AMD machine supports it.
We will try to reproduce the failure on the Xeon machine.
Evgeny
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Rutger
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
on core2duo E6850 stepping 0b, I got the following error:
Error: unimplemented function(s) used:
allZero_i32 in function __Vectorized_.preillumin
allOne_i32 in function __Vectorized_.preillumin
CompilerException Failed to parse IR
on a task (workgroup of 1 thread) for an housekeeping non optimized part of a code (listed at the end of my post).
Does it mean that I should avoid assignments as i=0; or i= -1; ???
code:
[cpp]__kernel __attribute__((reqd_work_group_size(1,1,1))) void preillumin(const int iazimut,const int bufoffset,const int buflongfilled,const float azimut,const __global float *altilook,const __global float *integstartband,const __global float *integstopband,const __global double *xtraj2,const __global double *ytraj2,const __global double *ztraj2,const __global float *psiatti2,const __global float *thetaatti2,const __global float *phiatti2,const __global float *lobexbuf,const __global float *t2,const __global float *hardpreintbuf,const __global float *rangefilterbuf,__global float *restrict compenstoband) { __private int i,j,n,p; __private float w,w2,w4,d1,d2,xfocus,yfocus,zfocus,xtraj,ytraj,ztraj; __private float c,s; __private float x,y,z; __private float x1,y1,z1; d2=azimut*(3.316377475857735e-02f)+(-1.877929329872131e+00f); i=floor(d2); if (i<0) { i=0; d2=0.0f; } if (i>132) { i=132; d2=133.0f; } d2-=i; d1=get_global_id(0)*(9.692779287317110e-01f)+(-3.377743680511874e-01f); j=floor(d1); if (j<0) { j=0; d1=0.0f; } if (j>43) { j=43; d1=44.0f; } d1-=j; n=45*i+j; zfocus=(1.0f-d2)*((1.0f-d1)*altilook+d1*altilook[n+1])+d2*((1.0f-d1)*altilook[n+45]+d1*altilook[n+46])+(-2.252434373756267e+03f); w=get_global_id(0)*(2.955416520878326e+01f)+(3.603700952450512e+03f); d2=zfocus+azimut*(-1.010356079772958e-04f); d1=w*(3.501031082123518e-03f)+d2*(-1.010356084929913e-04f); d2=w*w-d2*d2-d1*d1; if (d2<0.0f) { d1=w*(3.602066051945484e-03f); zfocus=azimut*(1.010356079772958e-04f)+w*(-9.999935018887185e-01f); d2=0.0f; } else d2=sqrt(d2); xfocus=azimut*(8.592837859064036e-01f)+d1*(8.592837902922715e-01f)+d2*(-5.114991375759568e-01f); yfocus=azimut*(-5.114991349652156e-01f)+d1*(-5.114991375759568e-01f)+d2*(-8.592837902922715e-01f); w2=0.0f; for (n=0;n<=2;n++) { p=64*iazimut+get_global_id(0); p=floor(integstartband +(integstopband
-integstartband
)*(5.000000000000000e-01f)*n+0.5f); if (p<0) p=0; if (p>=buflongfilled) p=buflongfilled-1; i=(p+bufoffset)&2047; xtraj=xfocus-xtraj2; ytraj=yfocus-ytraj2; ztraj=zfocus-ztraj2; d2=sqrt(xtraj*xtraj+ytraj*ytraj+ztraj*ztraj); x1=(8.660254037844387e-01f); y1=(5.302876193624535e-17f); z1=(-4.999999999999999e-01f); c=cos(phiatti2); s=sin(phiatti2); x=x1*c+z1*s; z1=z1*c-x1*s; x1=x; c=cos(thetaatti2); s=sin(thetaatti2); y=y1*c-z1*s; z1=z1*c+y1*s; y1=y; c=cos(psiatti2); s=sin(psiatti2); x=x1*c+y1*s; y1=y1*c-x1*s; x1=x; w=(xtraj*x1+ytraj*y1+ztraj*z1)/d2; w=native_sqrt(1.0f-w*w); i=floor(w*4000.0f+0.5f); if (i<0) i=0; if (i>3999) i=3999; w4=lobexbuf; if (!(w4>1.000000047497451e-03f)) w4=0.0f; i=p-1; if (i<0) i=0; i=(bufoffset+i)&2047; j=p+1; if (j>buflongfilled) j=buflongfilled; j=(bufoffset+j)&2047; x=xtraj2
-xtraj2; y=ytraj2 -ytraj2; z=ztraj2 -ztraj2; w=native_divide(xtraj*x+ytraj*y+ztraj*z,d2*(t2 -t2)); i=floor(w*(1.292359000000000e+06f)+(1.874149169921875e+03f)); if (i<0) i=0; if (i>3999) i=3999; w=hardpreintbuf; w4*=w; i=floor(d2*(6.101493061542592e+00f)+(-2.613644720440822e+04f)); if (i<-4097) i=-4097; if (i>4094) i=4094; w4=native_divide(w4,rangefilterbuf[i&8191]); i=1+(n&1); i=i+i; if (n==0 || n==2) i=1; w2+=i*w4; } w2*=1.666666666666667e-01f; p=64*iazimut+get_global_id(0); i=floor(integstopband ); if (i>=buflongfilled-1) i=buflongfilled-2; w=integstopband
-i; w=w*t2[(bufoffset+i+1)&2047]+(1.0f-w)*t2[(bufoffset+i)&2047]; i=floor(integstartband
); if (i<0) i=0; w4=integstartband
-i; w-=w4*t2[(bufoffset+i+1)&2047]+(1.0f-w4)*t2[(bufoffset+i)&2047]; w2*=w; compenstoband[get_global_id(0)]=w2; } [/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
My mistake (SSE4.1 is a requisite for Intel OpenCL SDK)
on a Xeon X5570 (I just checked) it compiles with no error.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page