- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Please confurm that you have these issues on Intel Xeon / RHEL6.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Rutger
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks a lot.
Please provide tarball.
Evgeny
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Please check if your AMD machine supports it.
We will try to reproduce the failure on the Xeon machine.
Evgeny
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Rutger
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
For the info.
I asked our QA team to reproduce the issue.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Rutger
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
on core2duo E6850 stepping 0b, I got the following error:
Error: unimplemented function(s) used:
allZero_i32 in function __Vectorized_.preillumin
allOne_i32 in function __Vectorized_.preillumin
CompilerException Failed to parse IR
on a task (workgroup of 1 thread) for an housekeeping non optimized part of a code (listed at the end of my post).
Does it mean that I should avoid assignments as i=0; or i= -1; ???
code:
[cpp]__kernel __attribute__((reqd_work_group_size(1,1,1))) void preillumin(const int iazimut,const int bufoffset,const int buflongfilled,const float azimut,const __global float *altilook,const __global float *integstartband,const __global float *integstopband,const __global double *xtraj2,const __global double *ytraj2,const __global double *ztraj2,const __global float *psiatti2,const __global float *thetaatti2,const __global float *phiatti2,const __global float *lobexbuf,const __global float *t2,const __global float *hardpreintbuf,const __global float *rangefilterbuf,__global float *restrict compenstoband)
{
__private int i,j,n,p;
__private float w,w2,w4,d1,d2,xfocus,yfocus,zfocus,xtraj,ytraj,ztraj;
__private float c,s;
__private float x,y,z;
__private float x1,y1,z1;
d2=azimut*(3.316377475857735e-02f)+(-1.877929329872131e+00f);
i=floor(d2);
if (i<0)
{
i=0;
d2=0.0f;
}
if (i>132)
{
i=132;
d2=133.0f;
}
d2-=i;
d1=get_global_id(0)*(9.692779287317110e-01f)+(-3.377743680511874e-01f);
j=floor(d1);
if (j<0)
{
j=0;
d1=0.0f;
}
if (j>43)
{
j=43;
d1=44.0f;
}
d1-=j;
n=45*i+j;
zfocus=(1.0f-d2)*((1.0f-d1)*altilook+d1*altilook[n+1])+d2*((1.0f-d1)*altilook[n+45]+d1*altilook[n+46])+(-2.252434373756267e+03f);
w=get_global_id(0)*(2.955416520878326e+01f)+(3.603700952450512e+03f);
d2=zfocus+azimut*(-1.010356079772958e-04f);
d1=w*(3.501031082123518e-03f)+d2*(-1.010356084929913e-04f);
d2=w*w-d2*d2-d1*d1;
if (d2<0.0f)
{
d1=w*(3.602066051945484e-03f);
zfocus=azimut*(1.010356079772958e-04f)+w*(-9.999935018887185e-01f);
d2=0.0f;
}
else
d2=sqrt(d2);
xfocus=azimut*(8.592837859064036e-01f)+d1*(8.592837902922715e-01f)+d2*(-5.114991375759568e-01f);
yfocus=azimut*(-5.114991349652156e-01f)+d1*(-5.114991375759568e-01f)+d2*(-8.592837902922715e-01f);
w2=0.0f;
for (n=0;n<=2;n++)
{
p=64*iazimut+get_global_id(0);
p=floor(integstartband+(integstopband
-integstartband
)*(5.000000000000000e-01f)*n+0.5f);
if (p<0)
p=0;
if (p>=buflongfilled)
p=buflongfilled-1;
i=(p+bufoffset)&2047;
xtraj=xfocus-xtraj2;
ytraj=yfocus-ytraj2;
ztraj=zfocus-ztraj2;
d2=sqrt(xtraj*xtraj+ytraj*ytraj+ztraj*ztraj);
x1=(8.660254037844387e-01f);
y1=(5.302876193624535e-17f);
z1=(-4.999999999999999e-01f);
c=cos(phiatti2);
s=sin(phiatti2);
x=x1*c+z1*s;
z1=z1*c-x1*s;
x1=x;
c=cos(thetaatti2);
s=sin(thetaatti2);
y=y1*c-z1*s;
z1=z1*c+y1*s;
y1=y;
c=cos(psiatti2);
s=sin(psiatti2);
x=x1*c+y1*s;
y1=y1*c-x1*s;
x1=x;
w=(xtraj*x1+ytraj*y1+ztraj*z1)/d2;
w=native_sqrt(1.0f-w*w);
i=floor(w*4000.0f+0.5f);
if (i<0)
i=0;
if (i>3999)
i=3999;
w4=lobexbuf;
if (!(w4>1.000000047497451e-03f))
w4=0.0f;
i=p-1;
if (i<0)
i=0;
i=(bufoffset+i)&2047;
j=p+1;
if (j>buflongfilled)
j=buflongfilled;
j=(bufoffset+j)&2047;
x=xtraj2-xtraj2;
y=ytraj2-ytraj2;
z=ztraj2-ztraj2;
w=native_divide(xtraj*x+ytraj*y+ztraj*z,d2*(t2-t2));
i=floor(w*(1.292359000000000e+06f)+(1.874149169921875e+03f));
if (i<0)
i=0;
if (i>3999)
i=3999;
w=hardpreintbuf;
w4*=w;
i=floor(d2*(6.101493061542592e+00f)+(-2.613644720440822e+04f));
if (i<-4097)
i=-4097;
if (i>4094)
i=4094;
w4=native_divide(w4,rangefilterbuf[i&8191]);
i=1+(n&1);
i=i+i;
if (n==0 || n==2)
i=1;
w2+=i*w4;
}
w2*=1.666666666666667e-01f;
p=64*iazimut+get_global_id(0);
i=floor(integstopband
);
if (i>=buflongfilled-1)
i=buflongfilled-2;
w=integstopband
-i;
w=w*t2[(bufoffset+i+1)&2047]+(1.0f-w)*t2[(bufoffset+i)&2047];
i=floor(integstartband
);
if (i<0)
i=0;
w4=integstartband
-i;
w-=w4*t2[(bufoffset+i+1)&2047]+(1.0f-w4)*t2[(bufoffset+i)&2047];
w2*=w;
compenstoband[get_global_id(0)]=w2;
}
[/cpp]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
My mistake (SSE4.1 is a requisite for Intel OpenCL SDK)
on a Xeon X5570 (I just checked) it compiles with no error.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page