OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1663 Discussions

Kernel crashes latest SDK with "access violation reading location ......"

ABoxe
Beginner
136 Views

Here is the offending kernel. The task is to find the maximum number of bits in a block of pixels.

 

 

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#define CODEBLOCKX  32

#define CODEBLOCKY 32

CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE  | CLK_FILTER_NEAREST;


void KERNEL run(write_only image2d_t R,
                         write_only image2d_t G, 
                         write_only image2d_t B,
                         write_only image2d_t A , const unsigned int  width, const unsigned int height) {

    // Red channel 

    //find maximum number of bits in code block
    LOCAL char msbScratch[CODEBLOCKX];

    // between one and 32 - zero value indicates that this code block is identically zero

    int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX,  getGlobalId(1)*CODEBLOCKY);
    int maxVal = -2147483647-1;
    for (int i = 0; i < CODEBLOCKY; ++i) {
        maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);    
        posIn.y++; 
    }

    char msbWI = 32 - clz(maxVal);
    msbScratch[getLocalId(0)] =msbWI;
    localMemoryFence();
    

    //group by twos
    if ( (getLocalId(0)&1) == 0) {
        msbWI = max(msbWI, msbScratch[getLocalId(0)+1]);
    }
    localMemoryFence();
    
    //group by fours
    if ( (getLocalId(0)&3) == 0) {
        msbWI = max(msbWI, msbScratch[getLocalId(0)+2]);
    }
    localMemoryFence();
    
    
    //group by eights
    if ( (getLocalId(0)&7) == 0) {
        msbWI = max(msbWI, msbScratch[getLocalId(0)+4]);
    }
    localMemoryFence();
    
    //group by 16ths
    if ( (getLocalId(0)&15) == 0) {
        msbWI = max(msbWI, msbScratch[getLocalId(0)+8]);
    }
    localMemoryFence();
    
    
    if (getLocalId(0) == 0) {
        msbScratch[0] = max(msbWI, msbScratch[16]);  //crashes here with access violation while reading location .....
    }
    localMemoryFence();
    

}

0 Kudos
5 Replies
ABoxe
Beginner
136 Views

Here is a simpler kernel that exhibits the same issue:

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define CONSTANT constant
#define KERNEL kernel
#define LOCAL local
#define GLOBAL global


size_t getGlobalId(    const uint dimindx) {
  return get_global_id(dimindx);
}
size_t getGroupId(    const uint dimindx) {
  return get_group_id(dimindx);
}
size_t getLocalId(    const uint dimindx) {
  return get_local_id(dimindx);
}

inline void localMemoryFence() {
    barrier(CLK_LOCAL_MEM_FENCE);
}

CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE  | CLK_FILTER_NEAREST;


void KERNEL run(read_only image2d_t R,
                         read_only image2d_t G, 
                         read_only image2d_t B,
                         read_only image2d_t A , const unsigned int  width, const unsigned int height) {

    //find maximum number of bits in code block

    LOCAL int msbScratch[CODEBLOCKX];
   int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX,  getGlobalId(1)*CODEBLOCKY);

    int maxVal = -2147483647-1;
    for (int i = 0; i < CODEBLOCKY; ++i) {
        maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);    
        posIn.y++; 
    }

    int msbWI = 32 - clz(maxVal);
    msbScratch[getLocalId(0)] =msbWI;
    localMemoryFence();
    

    if (getLocalId(0) == 0) {
        int4 maximum = (int4)(msbWI);
        for(int i=0; i < CODEBLOCKX; i+=4) {
            int4 temp = (int4)(msbScratch,msbScratch[i+1],msbScratch[i+2],msbScratch[i+3]);
            maximum = max(maximum,temp);
        }
    }

}

ABoxe
Beginner
136 Views

Guys,

There seem to be some serious bugs with handling local memory in the latest SDK.

I am having real problems moving forward on my kernel without some type of fix.

Thanks,

Aaron

ABoxe
Beginner
136 Views

My last edits got garbled. Here is the broken kernel:

 


Here is a simpler kernel that exhibits the same issue:

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

#define CONSTANT constant

#define KERNEL kernel

#define LOCAL local

#define GLOBAL global

 

size_t getGlobalId(    const uint dimindx) {

  return get_global_id(dimindx);

}

size_t getGroupId(    const uint dimindx) {

  return get_group_id(dimindx);

}

size_t getLocalId(    const uint dimindx) {

  return get_local_id(dimindx);

}

inline void localMemoryFence() {

    barrier(CLK_LOCAL_MEM_FENCE);

}

CONSTANT sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE  | CLK_FILTER_NEAREST;

 

void KERNEL run(read_only image2d_t R,

                         read_only image2d_t G, 

                         read_only image2d_t B,

                         read_only image2d_t A , const unsigned int  width, const unsigned int height) {

    //find maximum number of bits in code block

    LOCAL int msbScratch[CODEBLOCKX];

   int2 posIn = (int2)(getLocalId(0) + getGlobalId(0)*CODEBLOCKX,  getGlobalId(1)*CODEBLOCKY);

    int maxVal = -2147483647-1;

    for (int i = 0; i < CODEBLOCKY; ++i) {

        maxVal = max(maxVal, read_imagei(R, sampler, posIn).x);    

        posIn.y++; 

    }

    int msbWI = 32 - clz(maxVal);

    msbScratch[getLocalId(0)] =msbWI;

    localMemoryFence();

    

    // these next lines crash the kernel
    if (getLocalId(0) == 0) {

        int4 maximum = (int4)(msbWI);

        for(int i=0; i < CODEBLOCKX; i+=4) {

            int4 temp = (int4)(msbScratch,msbScratch[i+1],msbScratch[i+2],msbScratch[i+3]);
            maximum = max(maximum,temp);

        }

    }

}

ABoxe
Beginner
136 Views

Yeah, it looks like I can only read local memory, not store to it.  Storing causes kernel to crash.

This code does run on my AMD card.

Looks like some QA was sacrificed for the OpenCL 2.0 release......

 

 

Robert_I_Intel
Employee
136 Views

Aaron,

Your msbScratch is CODEBLOCKX in size. Note that in your for loop, for the i == CODEBLOCKX - 1 you are trying to access memory beyond your allocation. This will produce a crash.

Reply