- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Here is the offending kernel. The task is to find the maximum number of bits in a block of pixels.
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CODEBLOCKX 32
#define CODEBLOCKY 32
CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST;
void KERNEL run(write_only image2d_t R,
write_only image2d_t G,
write_only image2d_t B,
write_only image2d_t A , const unsigned int width, const unsigned int height) {
// Red channel
//find maximum number of bits in code block
LOCAL char msbScratch[CODEBLOCKX];
// between one and 32 - zero value indicates that this code block is identically zero
int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX, getGlobalId(1)*CODEBLOCKY);
int maxVal = -2147483647-1;
for (int i = 0; i < CODEBLOCKY; ++i) {
maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);
posIn.y++;
}
char msbWI = 32 - clz(maxVal);
msbScratch[getLocalId(0)] =msbWI;
localMemoryFence();
//group by twos
if ( (getLocalId(0)&1) == 0) {
msbWI = max(msbWI, msbScratch[getLocalId(0)+1]);
}
localMemoryFence();
//group by fours
if ( (getLocalId(0)&3) == 0) {
msbWI = max(msbWI, msbScratch[getLocalId(0)+2]);
}
localMemoryFence();
//group by eights
if ( (getLocalId(0)&7) == 0) {
msbWI = max(msbWI, msbScratch[getLocalId(0)+4]);
}
localMemoryFence();
//group by 16ths
if ( (getLocalId(0)&15) == 0) {
msbWI = max(msbWI, msbScratch[getLocalId(0)+8]);
}
localMemoryFence();
if (getLocalId(0) == 0) {
msbScratch[0] = max(msbWI, msbScratch[16]); //crashes here with access violation while reading location .....
}
localMemoryFence();
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Here is a simpler kernel that exhibits the same issue:
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CONSTANT constant
#define KERNEL kernel
#define LOCAL local
#define GLOBAL global
size_t getGlobalId( const uint dimindx) {
return get_global_id(dimindx);
}
size_t getGroupId( const uint dimindx) {
return get_group_id(dimindx);
}
size_t getLocalId( const uint dimindx) {
return get_local_id(dimindx);
}
inline void localMemoryFence() {
barrier(CLK_LOCAL_MEM_FENCE);
}
CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST;
void KERNEL run(read_only image2d_t R,
read_only image2d_t G,
read_only image2d_t B,
read_only image2d_t A , const unsigned int width, const unsigned int height) {
//find maximum number of bits in code block
LOCAL int msbScratch[CODEBLOCKX];
int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX, getGlobalId(1)*CODEBLOCKY);
int maxVal = -2147483647-1;
for (int i = 0; i < CODEBLOCKY; ++i) {
maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);
posIn.y++;
}
int msbWI = 32 - clz(maxVal);
msbScratch[getLocalId(0)] =msbWI;
localMemoryFence();
if (getLocalId(0) == 0) {
int4 maximum = (int4)(msbWI);
for(int i=0; i < CODEBLOCKX; i+=4) {
int4 temp = (int4)(msbScratch,msbScratch[i+1],msbScratch[i+2],msbScratch[i+3]);
maximum = max(maximum,temp);
}
}
}
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Guys,
There seem to be some serious bugs with handling local memory in the latest SDK.
I am having real problems moving forward on my kernel without some type of fix.
Thanks,
Aaron
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
My last edits got garbled. Here is the broken kernel:
Here is a simpler kernel that exhibits the same issue:
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CONSTANT constant
#define KERNEL kernel
#define LOCAL local
#define GLOBAL global
size_t getGlobalId( const uint dimindx) {
return get_global_id(dimindx);
}
size_t getGroupId( const uint dimindx) {
return get_group_id(dimindx);
}
size_t getLocalId( const uint dimindx) {
return get_local_id(dimindx);
}
inline void localMemoryFence() {
barrier(CLK_LOCAL_MEM_FENCE);
}
CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST;
void KERNEL run(read_only image2d_t R,
read_only image2d_t G,
read_only image2d_t B,
read_only image2d_t A , const unsigned int width, const unsigned int height) {
//find maximum number of bits in code block
LOCAL int msbScratch[CODEBLOCKX];
int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX, getGlobalId(1)*CODEBLOCKY);
int maxVal = -2147483647-1;
for (int i = 0; i < CODEBLOCKY; ++i) {
maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);
posIn.y++;
}
int msbWI = 32 - clz(maxVal);
msbScratch[getLocalId(0)] =msbWI;
localMemoryFence();
// these next lines crash the kernel
if (getLocalId(0) == 0) {
int4 maximum = (int4)(msbWI);
for(int i=0; i < CODEBLOCKX; i+=4) {
int4 temp = (int4)(msbScratch,msbScratch[i+1],msbScratch[i+2],msbScratch[i+3]);
maximum = max(maximum,temp);
}
}
}
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Yeah, it looks like I can only read local memory, not store to it. Storing causes kernel to crash.
This code does run on my AMD card.
Looks like some QA was sacrificed for the OpenCL 2.0 release......
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Aaron,
Your msbScratch is CODEBLOCKX in size. Note that in your for loop, for the i == CODEBLOCKX - 1 you are trying to access memory beyond your allocation. This will produce a crash.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page