- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I write a short code in OpenCL. The host fuction is below
float h_gaus[3][3] = {{0.0625, 0.125, 0.0625}, {0.1250, 0.250, 0.1250}, {0.0625, 0.125, 0.0625}};
cl_mem d_gaus = clCreateBuffer(ocl.clContext, CL_MEM_READ_WRITE, 3 * 3 * sizeof(float), NULL, &clStatus);
clStatus = clEnqueueWriteBuffer(ocl.clCommandQueue, d_gaus, CL_TRUE, 0, 3 * 3 * sizeof(float), h_gaus, 0, NULL, NULL);
int rowsc, colsc, in_size;
#ifdef TEST_ROWS
rowsc = TEST_ROWS;
#endif
#ifdef TEST_COLS
colsc = TEST_COLS;
#endif
in_size = rowsc * colsc * sizeof(unsigned char);
// define input and output(local buffer)
unsigned char *test_frame;
test_frame = (unsigned char*) alignedMalloc(in_size);
// initialize input frame
for (int i = 0; i < rowsc; i++)
for (int j = 0; j < colsc; j++)
{
test_frame[i * rowsc + j] = (unsigned char)100;
}
unsigned char *h_test;
unsigned char *h_out;
h_test = (unsigned char *)clSVMAllocAltera(ocl.clContext, 0, in_size, 1024);h_out = (unsigned char *)clSVMAllocAltera(ocl.clContext, 0, in_size, 1024);
memcpy(h_test, test_frame, in_size);
size_t ls[2] = {(size_t)16, (size_t)16};
size_t gs[2] = {(size_t)rowsc,(size_t)colsc};
size_t *offset = NULL;
clSetKernelArgSVMPointerAltera(ocl.clKernel_gauss, 0, (void*)h_test);
clSetKernelArgSVMPointerAltera(ocl.clKernel_gauss, 1, (void*)h_out);
clSetKernelArg(ocl.clKernel_gauss, 2, sizeof(int), &rowsc);
clSetKernelArg(ocl.clKernel_gauss, 3, sizeof(int), &colsc);
clSetKernelArg(ocl.clKernel_gauss, 4, (L_SIZE + 2) * (L_SIZE + 2) * sizeof(int), NULL);
clSetKernelArg(ocl.clKernel_gauss, 5, sizeof(cl_mem), &d_gaus);
clStatus = clEnqueueSVMMap(ocl.clCommandQueue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, (void *)h_test, in_size, 0, NULL, NULL);
CL_ERR();
clStatus = clEnqueueSVMMap(ocl.clCommandQueue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, (void *)h_out, in_size, 0, NULL, NULL);
CL_ERR();
clStatus = clEnqueueNDRangeKernel(ocl.clCommandQueue, ocl.clKernel_gauss, 2, offset, gs, ls, 0, NULL, NULL);
Here I define rowsc and colsc are both 64. The kernel function is below
__kernel void gaussian_kernel(__global unsigned char *data, __global unsigned char *out, int rows, int cols,
__local int *l_data, __global float *gaus)
{
int sum = 0;
int g_row = get_global_id(0);
int g_col = get_global_id(1);
int l_row = get_local_id(0) + 1;
int l_col = get_local_id(1) + 1;
int pos = g_row * cols + g_col;
out[pos] = data[pos];
// copy to local, the position itself
l_data[l_row * (L_SIZE + 2) + l_col] = data[pos];
// top most row
if (l_row == 1)
{
// top most global row, fill it with 0
l_data[0 * (L_SIZE + 2) + l_col] = (g_row==0 ? 0 : data[pos - cols]);
// top left
if (l_col == 1)
l_data[0 * (L_SIZE + 2) + 0] = (g_row==0 ? 0 : data[pos - cols - 1]);
// top right
else if (l_col == L_SIZE)
l_data[0 * (L_SIZE + 2) + L_SIZE + 1] = (g_row==0 ? 0 : data[pos - cols + 1]);
}
// bottom most row
else if (l_row == L_SIZE)
{
l_data[(L_SIZE + 1) * (L_SIZE + 2) + l_col] = (g_row==rows-1 ? 0 : data[pos + cols]);
// bottom left
if (l_col == 1)
l_data[(L_SIZE + 1) * (L_SIZE + 2) + 0] = (g_row==rows-1 ? 0 : data[pos + cols - 1]);
// bottom right
else if (l_col == L_SIZE)
l_data[(L_SIZE + 1) * (L_SIZE + 2) + L_SIZE + 1] = (g_row == rows-1 ? 0 : data[pos + cols + 1]);
}
// left most col
if (l_col == 1)
l_data[l_row * (L_SIZE + 2) + 0] = (g_col == 0 ? 0 : data[pos - 1]);
// right most col
else if (l_col == L_SIZE)
l_data[l_row * (L_SIZE + 2) + L_SIZE + 1] = (g_col == cols-1 ? 0 : data[pos + 1]);
barrier(CLK_LOCAL_MEM_FENCE);
// compute convolution
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 3; j++)
{
sum += gaus[i * 3 + j] * l_data[(i + l_row - 1) * (L_SIZE + 2) + j + l_col - 1];
}
}
out[pos] = min(255, max(0, sum));
}
I didn't add any restriction to neither max work group size nor max work items. I didn't assign the kernel a "task" attribute, either. But when I try to enqueue a 2D NDRange kernel, it returns -59. I didn't find any introduction about this condition on khronos' group website. So I wonder where is wrong in my code? Or any point I didn't get about HARPv2?
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
May I know do you have add attribute header eg: __attribute__((reqd_work_group_size(256, 2, 1))) in the kernel?
Thanks
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Can you provide the host code and kernel code attached as file here, so that I can run it on my side for further investigation.
Thanks.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page