Intel® Quartus® Prime Software
Intel® Quartus® Prime Design Software, Design Entry, Synthesis, Simulation, Verification, Timing Analysis, System Design (Platform Designer, formerly Qsys)
16556 Discussions

Getting segmentation fault if Kernelinput gets bigger

Altera_Forum
Honored Contributor II
1,010 Views

Hello , 

I have following OpenCL-host code, this code convolves the inputsignal with the mask 

int main() { 

//inits context and program 

init_opencl(); 

soc_convolution(500,500); 

clreleaseprogram(program); 

clreleasecontext(context); 

free(platforms); 

free(devices); 

} 

void soc_convolution(int inputsignalwidth , int inputsignalheight ) 

{ 

cl_uint inputsignal[inputsignalwidth][inputsignalheight]; 

// set the inputvalues to 1 

for(int i = 0; i < inputsignalwidth; i++) 

{ 

for(int j = 0; j < inputsignalheight; j++) 

{ 

inputsignal[j] = 1;[/I] 

} 

} 

const unsigned int outputsignalwidth = inputsignalwidth -2; 

const unsigned int outputsignalheight = inputsignalheight -2; 

cout << outputsignalwidth <<endl; 

cout << outputsignalheight <<endl; 

 

cl_uint outputsignal[outputsignalwidth][outputsignalheight]; 

const unsigned int maskwidth = 3; 

const unsigned int maskheight = 3; 

 

// kernel for convolve 

cl_uint mask[maskwidth][maskheight] = 

{ 

{ 1, 1, 1 }, 

{ 1, 0, 1 }, 

{ 1, 1, 1 }, 

}; 

 

// create kernel 

cl_kernel kernel = null; 

kernel = clcreatekernel(program, "convolve", &status); 

// create command queue and associate it with the device you want to execute on 

cl_command_queue cmdqueue; 

cmdqueue = clcreatecommandqueue(context,devices[0], 0, &status); 

checkerror(status, "failed to create commadnqueue"); 

 

// buffer 

cl_mem buffer_inputsignal = clcreatebuffer(context,cl_mem_read_only,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,null,&status); 

cl_mem buffer_mask = clcreatebuffer(context,cl_mem_read_only, sizeof(cl_uint) * maskwidth * maskheight, null,&status); 

cl_mem buffer_outputsignal = clcreatebuffer(context,cl_mem_write_only, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,null,&status); 

 

 

status = clenqueuewritebuffer(cmdqueue, buffer_inputsignal,cl_false,0,sizeof(cl_uint) * inputsignalwidth * inputsignalheight,inputsignal,0,null,null); 

status = clenqueuewritebuffer(cmdqueue, buffer_mask,cl_false,0,sizeof(cl_uint) * maskwidth * maskheight,mask,0,null,null); 

 

status = clsetkernelarg(kernel,0,sizeof(cl_mem),&buffer_inputsignal); 

status = clsetkernelarg(kernel,1,sizeof(cl_mem),&buffer_mask); 

status = clsetkernelarg(kernel,2,sizeof(cl_mem),&buffer_outputsignal); 

status = clsetkernelarg(kernel,3,sizeof(int),&inputsignalwidth); 

status = clsetkernelarg(kernel,4,sizeof(int),&maskwidth); 

 

size_t globalworksize[2]; 

globalworksize[0] = outputsignalwidth; 

globalworksize[1] = outputsignalheight; 

status = clenqueuendrangekernel(cmdqueue,kernel,2,null, globalworksize, null,0, null,null); 

clenqueuereadbuffer(cmdqueue,buffer_outputsignal,cl_true,0, sizeof(cl_uint) * outputsignalwidth * outputsignalheight,outputsignal, 0, null, null); 

 

//stop cpu till queue is finish 

clfinish(cmdqueue); 

 

// free opencl resources 

clreleasekernel(kernel); 

clreleasecommandqueue(cmdqueue); 

clreleasememobject(buffer_inputsignal); 

clreleasememobject(buffer_mask); 

clreleasememobject(buffer_outputsignal); 

and the following kernel-code for the convolution: 

__kernel void convolve(__global uint * input, __global uint * mask, __global uint * output,, const int inputwidth,const int maskwidth)  

{ 

const int x = get_global_id(0); 

const int y = get_global_id(1); 

uint sum = 0; 

for (int r = 0; r < maskwidth; r++) 

{ 

//inkrementieren des zeilenindex um breite der maske pro schleifendurchlauf  

const int idxrow = (y + r) * inputwidth + x; 

for (int c = 0; c < maskwidth; c++) 

{ 

//zeilenweises falten 

sum += mask[(r * maskwidth) + c] * input[idxrow + c]; 

} 

} 

output[y * get_global_size(0) + x] = sum; 

 

 

When i call the function SoC_Convolution with values up to 600 it works but if setting the values above this causes an segmentation fault error when excecuting. 

Can someone tell me whats the problem?  

 

Thanks :) 

0 Kudos
1 Reply
Altera_Forum
Honored Contributor II
297 Views

You should insert printfs into your host code to find the exact function that is "segfault"ing. You are most likely overflowing the stack by declaring your data structure as "cl_uint inputSignal[inputSignalWidth][inputSignalHeight]"; allocating it using malloc will likely fix your problem.

0 Kudos
Reply