Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Altera_Forum
Honored Contributor I
758 Views

Getting segmentation fault if Kernelinput gets bigger

Hello , 

I have following OpenCL-host code, this code convolves the inputsignal with the mask 

int main() { 

//inits context and program 

init_opencl(); 

soc_convolution(500,500); 

clreleaseprogram(program); 

clreleasecontext(context); 

free(platforms); 

free(devices); 

} 

void soc_convolution(int inputsignalwidth , int inputsignalheight ) 

{ 

cl_uint inputsignal[inputsignalwidth][inputsignalheight]; 

// set the inputvalues to 1 

for(int i = 0; i < inputsignalwidth; i++) 

{ 

for(int j = 0; j < inputsignalheight; j++) 

{ 

inputsignal[j] = 1;[/I] 

} 

} 

const unsigned int outputsignalwidth = inputsignalwidth -2; 

const unsigned int outputsignalheight = inputsignalheight -2; 

cout << outputsignalwidth <<endl; 

cout << outputsignalheight <<endl; 

 

cl_uint outputsignal[outputsignalwidth][outputsignalheight]; 

const unsigned int maskwidth = 3; 

const unsigned int maskheight = 3; 

 

// kernel for convolve 

cl_uint mask[maskwidth][maskheight] = 

{ 

{ 1, 1, 1 }, 

{ 1, 0, 1 }, 

{ 1, 1, 1 }, 

}; 

 

// create kernel 

cl_kernel kernel = null; 

kernel = clcreatekernel(program, "convolve", &status); 

// create command queue and associate it with the device you want to execute on 

cl_command_queue cmdqueue; 

cmdqueue = clcreatecommandqueue(context,devices[0], 0, &status); 

checkerror(status, "failed to create commadnqueue"); 

 

// buffer 

cl_mem buffer_inputsignal = clcreatebuffer(context,cl_mem_read_only,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,null,&status); 

cl_mem buffer_mask = clcreatebuffer(context,cl_mem_read_only, sizeof(cl_uint) * maskwidth * maskheight, null,&status); 

cl_mem buffer_outputsignal = clcreatebuffer(context,cl_mem_write_only, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,null,&status); 

 

 

status = clenqueuewritebuffer(cmdqueue, buffer_inputsignal,cl_false,0,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,inputsignal,0,null,null); 

status = clenqueuewritebuffer(cmdqueue, buffer_mask,cl_false,0,sizeof(cl_uint) * maskwidth * maskheight,mask,0,null,null); 

 

status = clsetkernelarg(kernel,0,sizeof(cl_mem),&buffer_inputsignal); 

status = clsetkernelarg(kernel,1,sizeof(cl_mem),&buffer_mask); 

status = clsetkernelarg(kernel,2,sizeof(cl_mem),&buffer_outputsignal); 

status = clsetkernelarg(kernel,3,sizeof(int),&inputsignalwidth); 

status = clsetkernelarg(kernel,4,sizeof(int),&maskwidth); 

 

size_t globalworksize[2]; 

globalworksize[0] = outputsignalwidth; 

globalworksize[1] = outputsignalheight; 

status = clenqueuendrangekernel(cmdqueue,kernel,2,null, globalworksize, null,0, null,null); 

clenqueuereadbuffer(cmdqueue,buffer_outputsignal,cl_true,0, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,outputsignal, 0, null, null); 

 

//stop cpu till queue is finish 

clfinish(cmdqueue); 

 

// free opencl resources 

clreleasekernel(kernel); 

clreleasecommandqueue(cmdqueue); 

clreleasememobject(buffer_inputsignal); 

clreleasememobject(buffer_mask); 

clreleasememobject(buffer_outputsignal); 

and the following kernel-code for the convolution: 

__kernel void convolve(__global uint * input, __global uint * mask, __global uint * output,, const int inputwidth,const int maskwidth)  

{ 

const int x = get_global_id(0); 

const int y = get_global_id(1); 

uint sum = 0; 

for (int r = 0; r < maskwidth; r++) 

{ 

//inkrementieren des zeilenindex um breite der maske pro schleifendurchlauf  

const int idxrow = (y + r) * inputwidth + x; 

for (int c = 0; c < maskwidth; c++) 

{ 

//zeilenweises falten 

sum += mask[(r * maskwidth) + c] * input[idxrow + c]; 

} 

} 

output[y * get_global_size(0) + x] = sum; 

 

 

When i call the function SoC_Convolution with values up to 600 it works but if setting the values above this causes an segmentation fault error when excecuting. 

Can someone tell me whats the problem?  

 

Thanks :) 

0 Kudos
1 Reply
Altera_Forum
Honored Contributor I
45 Views

You should insert printfs into your host code to find the exact function that is "segfault"ing. You are most likely overflowing the stack by declaring your data structure as "cl_uint inputSignal[inputSignalWidth][inputSignalHeight]"; allocating it using malloc will likely fix your problem.

Reply