- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I'm testing my code on an Ivy Bridge processor under Windows 7. When I tell it to use the CPU device, it works correctly. When I try to run on the GPU device, clCreateProgramWithSource() fails with the following error:
igdfcl32.dll successfully completed build.
error: Cannot yet select: 0x2fe3f10: i8 = any_extend 0x3d24138 [ID=66]
0x3d24138: i32 = and 0x3d240b0, 0x3e5ac30 [ID=63]
0x3d240b0: i32 = xor 0x2fe31c8, 0x3d233f0 [ID=61]
0x2fe31c8: i32 = GHAL3DISD::GHAL3DSETCC 0x3e5c078, 0x3e5ab20, 0x3d23f18 [ID=59]
0x3e5c078: f32,ch = llvm.GHAL3D.fabsf 0x2fe2f48, 0x3e5b0f8, 0x3d23830 [ORD=123] [ID=57]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5b0f8: i32 = Constant<50> [ID=18]
0x3d23830: f32 = fdiv 0x3e5acb8, 0x3e5c430 [ORD=103] [ID=55]
0x3e5acb8: f32 = fmul 0x3e5aa98, 0x2fe3690 [ORD=102] [ID=54]
0x3e5aa98: f32 = fmul 0x3e5b4c8, 0x3d23258 [ORD=101] [ID=37]
0x3e5b4c8: f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3be0 [ORD=101] [ID=25]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3be0: f32 = Register %reg1038 [ORD=101] [ID=6]
0x3d23258: f32 = ConstantFP<-2.000000e+000> [ORD=101] [ID=7]
0x2fe3690: f32 = fadd 0x3d23a50, 0x2fe3cf0 [ORD=100] [ID=53]
0x3d23a50: f32 = fadd 0x3d23500, 0x2fe3608 [ORD=96] [ID=52]
0x3d23500: f32 = fmul 0x2fe3e00, 0x3e5c100 [ORD=92] [ID=47]
0x2fe3e00: f32 = GHAL3DISD::EXTRACTX 0x3d5ab58 [ID=40]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x3e5c100: f32 = GHAL3DISD::EXTRACTX 0x3d5abe0 [ID=34]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x2fe3608: f32 = fmul 0x3d232e0, 0x2fe3938 [ORD=95] [ID=48]
0x3d232e0: f32 = GHAL3DISD::EXTRACTY 0x3d5ab58 [ID=41]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x2fe3938: f32 = GHAL3DISD::EXTRACTY 0x3d5abe0 [ID=35]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x2fe3cf0: f32 = fmul 0x3d23610, 0x2fe37a0 [ORD=99] [ID=49]
0x3d23610: f32 = GHAL3DISD::EXTRACTZ 0x3d5ab58 [ID=42]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x2fe37a0: f32 = GHAL3DISD::EXTRACTZ 0x3d5abe0 [ID=36]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x3e5c430: f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a6e0 [ORD=103] [ID=26]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a6e0: f32 = Register %reg1051 [ORD=103] [ID=8]
0x3e5ab20: f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a988 [ORD=124] [ID=29]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a988: f32 = Register %reg1025 [ORD=124] [ID=11]
0x3d23f18: ch = setgt [ID=20]
0x3d233f0: i32 = Constant<-1> [ORD=88] [ID=2]
0x3e5ac30: i32 = Constant<1> [ID=17]
error: midlevel compiler failed build.
Any idea what this means?
Peter
igdfcl32.dll successfully completed build.
error: Cannot yet select: 0x2fe3f10: i8 = any_extend 0x3d24138 [ID=66]
0x3d24138: i32 = and 0x3d240b0, 0x3e5ac30 [ID=63]
0x3d240b0: i32 = xor 0x2fe31c8, 0x3d233f0 [ID=61]
0x2fe31c8: i32 = GHAL3DISD::GHAL3DSETCC 0x3e5c078, 0x3e5ab20, 0x3d23f18 [ID=59]
0x3e5c078: f32,ch = llvm.GHAL3D.fabsf 0x2fe2f48, 0x3e5b0f8, 0x3d23830 [ORD=123] [ID=57]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5b0f8: i32 = Constant<50> [ID=18]
0x3d23830: f32 = fdiv 0x3e5acb8, 0x3e5c430 [ORD=103] [ID=55]
0x3e5acb8: f32 = fmul 0x3e5aa98, 0x2fe3690 [ORD=102] [ID=54]
0x3e5aa98: f32 = fmul 0x3e5b4c8, 0x3d23258 [ORD=101] [ID=37]
0x3e5b4c8: f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3be0 [ORD=101] [ID=25]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3be0: f32 = Register %reg1038 [ORD=101] [ID=6]
0x3d23258: f32 = ConstantFP<-2.000000e+000> [ORD=101] [ID=7]
0x2fe3690: f32 = fadd 0x3d23a50, 0x2fe3cf0 [ORD=100] [ID=53]
0x3d23a50: f32 = fadd 0x3d23500, 0x2fe3608 [ORD=96] [ID=52]
0x3d23500: f32 = fmul 0x2fe3e00, 0x3e5c100 [ORD=92] [ID=47]
0x2fe3e00: f32 = GHAL3DISD::EXTRACTX 0x3d5ab58 [ID=40]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x3e5c100: f32 = GHAL3DISD::EXTRACTX 0x3d5abe0 [ID=34]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x2fe3608: f32 = fmul 0x3d232e0, 0x2fe3938 [ORD=95] [ID=48]
0x3d232e0: f32 = GHAL3DISD::EXTRACTY 0x3d5ab58 [ID=41]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x2fe3938: f32 = GHAL3DISD::EXTRACTY 0x3d5abe0 [ID=35]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x2fe3cf0: f32 = fmul 0x3d23610, 0x2fe37a0 [ORD=99] [ID=49]
0x3d23610: f32 = GHAL3DISD::EXTRACTZ 0x3d5ab58 [ID=42]
0x3d5ab58: v4f32 = fsub 0x3e5ae50, 0x3d5aad0 [ORD=89] [ID=33]
0x3e5ae50: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3d5a828 [ORD=89] [ID=22]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3d5a828: v4f32 = Register %reg1057 [ORD=89] [ID=3]
0x3d5aad0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a878 [ORD=89] [ID=23]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a878: v4f32 = Register %reg1056 [ORD=89] [ID=4]
0x2fe37a0: f32 = GHAL3DISD::EXTRACTZ 0x3d5abe0 [ID=36]
0x3d5abe0: v4f32,ch = CopyFromReg 0x2fe2f48, 0x2fe3580 [ORD=91] [ID=24]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x2fe3580: v4f32 = Register %reg1048 [ORD=91] [ID=5]
0x3e5c430: f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a6e0 [ORD=103] [ID=26]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a6e0: f32 = Register %reg1051 [ORD=103] [ID=8]
0x3e5ab20: f32,ch = CopyFromReg 0x2fe2f48, 0x3e5a988 [ORD=124] [ID=29]
0x2fe2f48: ch = EntryToken [ORD=88] [ID=0]
0x3e5a988: f32 = Register %reg1025 [ORD=124] [ID=11]
0x3d23f18: ch = setgt [ID=20]
0x3d233f0: i32 = Constant<-1> [ORD=88] [ID=2]
0x3e5ac30: i32 = Constant<1> [ID=17]
error: midlevel compiler failed build.
Any idea what this means?
Peter
Link Copied
5 Replies
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Its hard to know from the error log. If you can share the code that will be great. Or I will suggest start deleting the code from kernel to pin point to a small set of lines which causes this issue.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Here is the source code for the kernel. This is part of OpenMM (https://simtk.org/home/openmm):
#define EXP native_exp #define LOG native_log #define RECIP native_recip #define RSQRT native_rsqrt #define SQRT native_sqrt #define WORK_GROUP_SIZE 64 #define CONSTRAIN_VELOCITIES 1 /** * Enforce constraints on SHAKE clusters */ __kernel void applyShakeToHydrogens(int numClusters, float tol, __global const float4* restrict oldPos, __global float4* restrict posDelta, __global const int4* restrict clusterAtoms, __global const float4* restrict clusterParams) { int index = get_global_id(0); while (index < numClusters) { // Load the data for this cluster. int4 atoms = clusterAtoms[index]; float4 params = clusterParams[index]; float4 pos = oldPos[atoms.x]; float4 xpi = posDelta[atoms.x]; float4 pos1 = oldPos[atoms.y]; float4 xpj1 = posDelta[atoms.y]; float4 pos2 = {0.0f, 0.0f, 0.0f, 0.0f}; float4 xpj2 = {0.0f, 0.0f, 0.0f, 0.0f}; float invMassCentral = params.x; float avgMass = params.y; float d2 = params.z; float invMassPeripheral = params.w; if (atoms.z != -1) { pos2 = oldPos[atoms.z]; xpj2 = posDelta[atoms.z]; } float4 pos3 = {0.0f, 0.0f, 0.0f, 0.0f}; float4 xpj3 = {0.0f, 0.0f, 0.0f, 0.0f}; if (atoms.w != -1) { pos3 = oldPos[atoms.w]; xpj3 = posDelta[atoms.w]; } // Precompute quantities. float4 rij1 = pos-pos1; float4 rij2 = pos-pos2; float4 rij3 = pos-pos3; float rij1sq = rij1.x*rij1.x + rij1.y*rij1.y + rij1.z*rij1.z; float rij2sq = rij2.x*rij2.x + rij2.y*rij2.y + rij2.z*rij2.z; float rij3sq = rij3.x*rij3.x + rij3.y*rij3.y + rij3.z*rij3.z; float ld1 = d2-rij1sq; float ld2 = d2-rij2sq; float ld3 = d2-rij3sq; // Iterate until convergence. bool converged = false; int iteration = 0; while (iteration < 15 && !converged) { converged = true; #ifdef CONSTRAIN_VELOCITIES float4 rpij = xpi-xpj1; float rrpr = rpij.x*rij1.x + rpij.y*rij1.y + rpij.z*rij1.z; float delta = -2.0f*avgMass*rrpr/rij1sq; float4 dr = rij1*delta; xpi.xyz += dr.xyz*invMassCentral; xpj1.xyz -= dr.xyz*invMassPeripheral; if (fabs(delta) > tol) converged = false; if (atoms.z != -1) { rpij = xpi-xpj2; rrpr = rpij.x*rij2.x + rpij.y*rij2.y + rpij.z*rij2.z; delta = -2.0f*avgMass*rrpr/rij2sq; dr = rij2*delta; xpi.xyz += dr.xyz*invMassCentral; xpj2.xyz -= dr.xyz*invMassPeripheral; if (fabs(delta) > tol) converged = false; } if (atoms.w != -1) { rpij = xpi-xpj3; rrpr = rpij.x*rij3.x + rpij.y*rij3.y + rpij.z*rij3.z; delta = -2.0f*avgMass*rrpr/rij3sq; dr = rij3*delta; xpi.xyz += dr.xyz*invMassCentral; xpj3.xyz -= dr.xyz*invMassPeripheral; if (fabs(delta) > tol) converged = false; } #else float4 rpij = xpi-xpj1; float rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z; float rrpr = rij1.x*rpij.x + rij1.y*rpij.y + rij1.z*rpij.z; float diff = fabs(ld1-2.0f*rrpr-rpsqij) / (d2*tol); if (diff >= 1.0f) { float acor = (ld1-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij1sq); float4 dr = rij1*acor; xpi.xyz += dr.xyz*invMassCentral; xpj1.xyz -= dr.xyz*invMassPeripheral; converged = false; } if (atoms.z != -1) { rpij.xyz = xpi.xyz-xpj2.xyz; rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z; rrpr = rij2.x*rpij.x + rij2.y*rpij.y + rij2.z*rpij.z; diff = fabs(ld2-2.0f*rrpr-rpsqij) / (d2*tol); if (diff >= 1.0f) { float acor = (ld2 - 2.0f*rrpr - rpsqij)*avgMass / (rrpr + rij2sq); float4 dr = rij2*acor; xpi.xyz += dr.xyz*invMassCentral; xpj2.xyz -= dr.xyz*invMassPeripheral; converged = false; } } if (atoms.w != -1) { rpij.xyz = xpi.xyz-xpj3.xyz; rpsqij = rpij.x*rpij.x + rpij.y*rpij.y + rpij.z*rpij.z; rrpr = rij3.x*rpij.x + rij3.y*rpij.y + rij3.z*rpij.z; diff = fabs(ld3 - 2.0f*rrpr - rpsqij) / (d2*tol); if (diff >= 1.0f) { float acor = (ld3-2.0f*rrpr-rpsqij)*avgMass / (rrpr+rij3sq); float4 dr = rij3*acor; xpi.xyz += dr.xyz*invMassCentral; xpj3.xyz -= dr.xyz*invMassPeripheral; converged = false; } } #endif iteration++; } // Record the new positions. posDelta[atoms.x] = xpi; posDelta[atoms.y] = xpj1; if (atoms.z != -1) posDelta[atoms.z] = xpj2; if (atoms.w != -1) posDelta[atoms.w] = xpj3; index += get_global_size(0); } }
Peter
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Peter,
Thanks for providing the kernel. I am able to reproduce the build error on the GPU using the 2012SDK. I'll get back to you on my findings after debug it more.
Thanks,
Raghu
Thanks for providing the kernel. I am able to reproduce the build error on the GPU using the 2012SDK. I'll get back to you on my findings after debug it more.
Thanks,
Raghu
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I was able to narrow down the test case to a few lines causing the build to fail. I have filed a bug and will let you know if development suggests a fix or a workaround.
Thanks,
Raghu
Thanks,
Raghu
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Great, thank you for the update.
Peter
Peter

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page