- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
First, to preface I believe this is related to the bug reported here (https://software.intel.com/en-us/forums/opencl/topic/704155). However, I'm reporting anyways on the chance that it is indeed distinct. In any case it's appeared under new circumstance to the user (i.e. me)
main.c program code:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <CL/cl.h>
#define MEM_SIZE (16)
#define MAX_SOURCE_SIZE (0x100000)
//simple error checking, not strictly necessary:
#define err(ans) { cpu_assert((ans), __FILE__, __LINE__); }
const char *getErrorString(cl_int error)
{
switch(error){
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
void cpu_assert(cl_int x, const char *file, int line) {
if (x != CL_SUCCESS)
{
fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line);
exit(x);
}
}
//main program
int main()
{
double mem[MEM_SIZE] = {0};
cl_platform_id platform_id[10];
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
FILE *fp;
const char fileName[] = "./kernel.cl";
size_t source_size;
char *source_str;
cl_int i;
/* Load kernel source code */
fp = fopen(fileName, "r");
if (!fp) {
exit(-1);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Get platform/device information */
err(clGetPlatformIDs(10, platform_id, &ret_num_platforms));
cl_platform_id pid = NULL;
for (int i = 0; i < ret_num_platforms; ++i)
{
//check if intel
char pvendor[500];
size_t psize = 500 * sizeof(char);
//choose the first intel platofrm
char intel_check[10] = "Intel";
err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL));
if(strstr(pvendor, intel_check) != NULL)
{
pid = platform_id;
}
}
//get the Intel CPU
err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices));
/* Create OpenCL Context */
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
err(ret);
/* Create Command Queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
err(ret);
/* Create memory buffer*/
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret);
err(ret);
/* Create Kernel program from the read in source */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
err(ret);
/* Build Kernel Program */
err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
/* Create OpenCL Kernel */
kernel = clCreateKernel(program, "test_kernel", &ret);
err(ret);
/* Set OpenCL kernel argument */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
err(ret);
size_t global_work_size[3] = {MEM_SIZE, 0, 0};
size_t local_work_size[3] = {4, 0, 0};
/* Execute OpenCL kernel */
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
err(ret);
/* Transfer result from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL);
err(ret);
/* Display result */
for (i=0; i < MEM_SIZE; i++) {
printf("%e\t", mem);
}
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
kernel.cl code:
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
if (14 + -4 * gid(0) + -1 * lid(0) >= 0)
out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
}
and is compiled with:
gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out
This outputs on my system ( intel opencl-1.2-6.4.0.24 runtime, Intel(R) Xeon(R) CPU X5650, on RHEL 7.3 (Maipo)):
0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
Where we would expect 0 - 15, with the last number being unset.
As with the previous bug (https://software.intel.com/en-us/forums/opencl/topic/704155), the error can be resolved by switching to a "pure" minus, e.g.:
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
if (14 - 4 * gid(0) - 1 * lid(0) >= 0)
out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
}
OR by adding a corresponding 'else' statement (likely via elifs too):
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
if (14 + -4 * gid(0) + -1 * lid(0) >= 0)
out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
else
out[4 * gid(0) + lid(0)] = -1;
}
Best,
Nick
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for this report. Issue is replicated and filed.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page