- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello ,
I have following OpenCL-host code, this code convolves the inputsignal with the mask int main() { //inits context and program init_opencl(); soc_convolution(500,500); clreleaseprogram(program); clreleasecontext(context); free(platforms); free(devices); } void soc_convolution(int inputsignalwidth , int inputsignalheight ) { cl_uint inputsignal[inputsignalwidth][inputsignalheight]; // set the inputvalues to 1 for(int i = 0; i < inputsignalwidth; i++) { for(int j = 0; j < inputsignalheight; j++) { inputsignal[j] = 1;[/I] } } const unsigned int outputsignalwidth = inputsignalwidth -2; const unsigned int outputsignalheight = inputsignalheight -2; cout << outputsignalwidth <<endl; cout << outputsignalheight <<endl; cl_uint outputsignal[outputsignalwidth][outputsignalheight]; const unsigned int maskwidth = 3; const unsigned int maskheight = 3;// kernel for convolve cl_uint mask[maskwidth][maskheight] = { { 1, 1, 1 }, { 1, 0, 1 }, { 1, 1, 1 }, }; // create kernel cl_kernel kernel = null; kernel = clcreatekernel(program, "convolve", &status); // create command queue and associate it with the device you want to execute on cl_command_queue cmdqueue; cmdqueue = clcreatecommandqueue(context,devices[0], 0, &status); checkerror(status, "failed to create commadnqueue"); // buffer cl_mem buffer_inputsignal = clcreatebuffer(context,cl_mem_read_only,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,null,&status); cl_mem buffer_mask = clcreatebuffer(context,cl_mem_read_only, sizeof(cl_uint) * maskwidth * maskheight, null,&status); cl_mem buffer_outputsignal = clcreatebuffer(context,cl_mem_write_only, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,null,&status); status = clenqueuewritebuffer(cmdqueue, buffer_inputsignal,cl_false,0,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,inputsignal,0,null,null); status = clenqueuewritebuffer(cmdqueue, buffer_mask,cl_false,0,sizeof(cl_uint) * maskwidth * maskheight,mask,0,null,null); status = clsetkernelarg(kernel,0,sizeof(cl_mem),&buffer_inputsignal); status = clsetkernelarg(kernel,1,sizeof(cl_mem),&buffer_mask); status = clsetkernelarg(kernel,2,sizeof(cl_mem),&buffer_outputsignal); status = clsetkernelarg(kernel,3,sizeof(int),&inputsignalwidth); status = clsetkernelarg(kernel,4,sizeof(int),&maskwidth); size_t globalworksize[2]; globalworksize[0] = outputsignalwidth; globalworksize[1] = outputsignalheight; status = clenqueuendrangekernel(cmdqueue,kernel,2,null, globalworksize, null,0, null,null); clenqueuereadbuffer(cmdqueue,buffer_outputsignal,cl_true,0, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,outputsignal, 0, null, null); //stop cpu till queue is finish clfinish(cmdqueue); // free opencl resources clreleasekernel(kernel); clreleasecommandqueue(cmdqueue); clreleasememobject(buffer_inputsignal); clreleasememobject(buffer_mask); clreleasememobject(buffer_outputsignal); }
and the following kernel-code for the convolution: __kernel void convolve(__global uint * input, __global uint * mask, __global uint * output,, const int inputwidth,const int maskwidth) { const int x = get_global_id(0); const int y = get_global_id(1); uint sum = 0; for (int r = 0; r < maskwidth; r++) { //inkrementieren des zeilenindex um breite der maske pro schleifendurchlauf const int idxrow = (y + r) * inputwidth + x; for (int c = 0; c < maskwidth; c++) { //zeilenweises falten sum += mask[(r * maskwidth) + c] * input[idxrow + c]; } } output[y * get_global_size(0) + x] = sum; }
When i call the function SoC_Convolution with values up to 600 it works but if setting the values above this causes an segmentation fault error when excecuting. Can someone tell me whats the problem? Thanks :)
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
You should insert printfs into your host code to find the exact function that is "segfault"ing. You are most likely overflowing the stack by declaring your data structure as "cl_uint inputSignal[inputSignalWidth][inputSignalHeight]"; allocating it using malloc will likely fix your problem.
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page