OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU.
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1719 Discussions

kernel crashes with access violation on windows 7

ABoxe
Beginner
294 Views

Hello,

Below is a kernel that crashes with an access violation on windows 7 with latest intel opencl sdk,

targeting a nehalem quad core CPU.

Actually, the kernel as written will not crash, but if you uncomment the image write line, then it will.

Also, if you uncomment the image write and comment the image read, then it is fine.

I am using a 2048x2048 image to run this on.  

You may also download the complete visual studio project at:

https://github.com/boxerab/roger

 

Thanks,

Aaron

 

//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
#define TOTAL_BUFFER_SIZE  1445   
#define WIN_SIZE_Y        8

constant sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_MIRRORED_REPEAT  | CLK_FILTER_NEAREST;

void kernel run(__read_only image2d_t idata, __write_only image2d_t odata,   
                       const unsigned int  width, const unsigned int  height, const unsigned int steps) {
    local int scratch[TOTAL_BUFFER_SIZE*4];

    //cache last three pixels
    int pixCache[3];
    
    int firstY = get_global_id(1) *steps*WIN_SIZE_Y;


    //1. initialize column (and perhaps boundary column)

    //2. read column into scratch

    //3. vertically transform column

    //4. horizontally transform all rows corresponding to this column

    //5. write into destination


    int yIndex = get_global_id(1) * steps * WIN_SIZE_Y;
    for (int i = 0; i < steps; ++i) {

       //store a line in local memory
       for (int j = 0; j < WIN_SIZE_Y; ++j) {
       
            const int2 pos = get_global_id(0), yIndex + j };
            
            if (pos.x < width && pos.y < height) {
            
                const float2 posNormal = {pos.x/(float)width, pos.y/(float)height};
                int4 pix = read_imagei(idata, sampler, posNormal);
        
                int channelIndex = pos.x + pos.y * width;
                scratch[channelIndex] = pix.x;
                channelIndex += TOTAL_BUFFER_SIZE;
                scratch[channelIndex] = pix.y;
                channelIndex += TOTAL_BUFFER_SIZE;
                scratch[channelIndex] = pix.z;
                channelIndex += TOTAL_BUFFER_SIZE;
                scratch[channelIndex] = pix.w;
                
            }
        }
    
        // read from local memory and store in destination
        for (int j = 0; j < WIN_SIZE_Y; ++j) {
            const int2 pos = {getGlobalId(0), yIndex + j };
            if (pos.x < width && pos.y < height) {
                int channelIndex = pos.x + pos.y * width;
                int4 pix;

                pix.x = scratch[channelIndex];
                channelIndex += TOTAL_BUFFER_SIZE;
                pix.y = scratch[channelIndex] ;
                channelIndex += TOTAL_BUFFER_SIZE;
                pix.z = scratch[channelIndex];
                channelIndex += TOTAL_BUFFER_SIZE;
                pix.w = scratch[channelIndex];
                

                //uncomment me to reproduce the crash
                //write_imagei(odata, pos,pix);

            }
        }
        yIndex += WIN_SIZE_Y;
    }
    
}

0 Kudos
2 Replies
ABoxe
Beginner
294 Views

Here is a simpler kernel that exhibits this crash:

(is this a race condition?)

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_MIRRORED_REPEAT  | CLK_FILTER_NEAREST;

void __kernel run(__read_only image2d_t idata, __write_only image2d_t odata,   
                       const unsigned int  width, const unsigned int  height, const unsigned int steps) {
    __local int scratch[5000];
    int yIndex = getGlobalId(1) * steps * 8;
    for (int i = 0; i < steps; ++i) {
       //store a line in local memory
       for (int j = 0; j < 8; ++j) {
       
            const int2 pos = {get_global_id(0), yIndex + j };
            
            if (pos.x < width && pos.y < height) {
            
                const float2 posNormal = {pos.x/(float)width, pos.y/(float)height};
                int channelIndex = (pos.x + pos.y * width)*4;
                int4 t = vload4(channelIndex,scratch);
                write_imagei(odata, pos,t);

            }
        }
        yIndex += 8;
    }
}

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

0 Kudos
ABoxe
Beginner
294 Views

This turned out to be my bad. Please disregard.

 

Thanks,

Aaron

0 Kudos
Reply