- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
First, to preface I believe this is related to the bug reported here (https://software.intel.com/en-us/forums/opencl/topic/704155). However, I'm reporting anyways on the chance that it is indeed distinct. In any case it's appeared under new circumstance to the user (i.e. me)
main.c program code:
#include <stdlib.h> #include <string.h> #include <stdio.h> #include <CL/cl.h> #define MEM_SIZE (16) #define MAX_SOURCE_SIZE (0x100000) //simple error checking, not strictly necessary: #define err(ans) { cpu_assert((ans), __FILE__, __LINE__); } const char *getErrorString(cl_int error) { switch(error){ // run-time and JIT compiler errors case 0: return "CL_SUCCESS"; case -1: return "CL_DEVICE_NOT_FOUND"; case -2: return "CL_DEVICE_NOT_AVAILABLE"; case -3: return "CL_COMPILER_NOT_AVAILABLE"; case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; case -5: return "CL_OUT_OF_RESOURCES"; case -6: return "CL_OUT_OF_HOST_MEMORY"; case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; case -8: return "CL_MEM_COPY_OVERLAP"; case -9: return "CL_IMAGE_FORMAT_MISMATCH"; case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; case -11: return "CL_BUILD_PROGRAM_FAILURE"; case -12: return "CL_MAP_FAILURE"; case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; case -15: return "CL_COMPILE_PROGRAM_FAILURE"; case -16: return "CL_LINKER_NOT_AVAILABLE"; case -17: return "CL_LINK_PROGRAM_FAILURE"; case -18: return "CL_DEVICE_PARTITION_FAILED"; case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; // compile-time errors case -30: return "CL_INVALID_VALUE"; case -31: return "CL_INVALID_DEVICE_TYPE"; case -32: return "CL_INVALID_PLATFORM"; case -33: return "CL_INVALID_DEVICE"; case -34: return "CL_INVALID_CONTEXT"; case -35: return "CL_INVALID_QUEUE_PROPERTIES"; case -36: return "CL_INVALID_COMMAND_QUEUE"; case -37: return "CL_INVALID_HOST_PTR"; case -38: return "CL_INVALID_MEM_OBJECT"; case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; case -40: return "CL_INVALID_IMAGE_SIZE"; case -41: return "CL_INVALID_SAMPLER"; case -42: return "CL_INVALID_BINARY"; case -43: return "CL_INVALID_BUILD_OPTIONS"; case -44: return "CL_INVALID_PROGRAM"; case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; case -46: return "CL_INVALID_KERNEL_NAME"; case -47: return "CL_INVALID_KERNEL_DEFINITION"; case -48: return "CL_INVALID_KERNEL"; case -49: return "CL_INVALID_ARG_INDEX"; case -50: return "CL_INVALID_ARG_VALUE"; case -51: return "CL_INVALID_ARG_SIZE"; case -52: return "CL_INVALID_KERNEL_ARGS"; case -53: return "CL_INVALID_WORK_DIMENSION"; case -54: return "CL_INVALID_WORK_GROUP_SIZE"; case -55: return "CL_INVALID_WORK_ITEM_SIZE"; case -56: return "CL_INVALID_GLOBAL_OFFSET"; case -57: return "CL_INVALID_EVENT_WAIT_LIST"; case -58: return "CL_INVALID_EVENT"; case -59: return "CL_INVALID_OPERATION"; case -60: return "CL_INVALID_GL_OBJECT"; case -61: return "CL_INVALID_BUFFER_SIZE"; case -62: return "CL_INVALID_MIP_LEVEL"; case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; case -64: return "CL_INVALID_PROPERTY"; case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; case -66: return "CL_INVALID_COMPILER_OPTIONS"; case -67: return "CL_INVALID_LINKER_OPTIONS"; case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; // extension errors case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; default: return "Unknown OpenCL error"; } } void cpu_assert(cl_int x, const char *file, int line) { if (x != CL_SUCCESS) { fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line); exit(x); } } //main program int main() { double mem[MEM_SIZE] = {0}; cl_platform_id platform_id[10]; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue command_queue = NULL; cl_mem memobj = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret; FILE *fp; const char fileName[] = "./kernel.cl"; size_t source_size; char *source_str; cl_int i; /* Load kernel source code */ fp = fopen(fileName, "r"); if (!fp) { exit(-1); } source_str = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); /* Get platform/device information */ err(clGetPlatformIDs(10, platform_id, &ret_num_platforms)); cl_platform_id pid = NULL; for (int i = 0; i < ret_num_platforms; ++i) { //check if intel char pvendor[500]; size_t psize = 500 * sizeof(char); //choose the first intel platofrm char intel_check[10] = "Intel"; err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL)); if(strstr(pvendor, intel_check) != NULL) { pid = platform_id; } } //get the Intel CPU err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices)); /* Create OpenCL Context */ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); err(ret); /* Create Command Queue */ command_queue = clCreateCommandQueue(context, device_id, 0, &ret); err(ret); /* Create memory buffer*/ memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret); err(ret); /* Create Kernel program from the read in source */ program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); err(ret); /* Build Kernel Program */ err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); /* Create OpenCL Kernel */ kernel = clCreateKernel(program, "test_kernel", &ret); err(ret); /* Set OpenCL kernel argument */ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj); err(ret); size_t global_work_size[3] = {MEM_SIZE, 0, 0}; size_t local_work_size[3] = {4, 0, 0}; /* Execute OpenCL kernel */ ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); err(ret); /* Transfer result from the memory buffer */ ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL); err(ret); /* Display result */ for (i=0; i < MEM_SIZE; i++) { printf("%e\t", mem); } /* Finalization */ ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(memobj); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); free(source_str); return 0; }
kernel.cl code:
#define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif #pragma OPENCL EXTENSION cl_inte_printf: enable __kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out) { if (14 + -4 * gid(0) + -1 * lid(0) >= 0) out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0); }
and is compiled with:
gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out
This outputs on my system ( intel opencl-1.2-6.4.0.24 runtime, Intel(R) Xeon(R) CPU X5650, on RHEL 7.3 (Maipo)):
0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
Where we would expect 0 - 15, with the last number being unset.
As with the previous bug (https://software.intel.com/en-us/forums/opencl/topic/704155), the error can be resolved by switching to a "pure" minus, e.g.:
#define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif #pragma OPENCL EXTENSION cl_inte_printf: enable __kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out) { if (14 - 4 * gid(0) - 1 * lid(0) >= 0) out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0); }
OR by adding a corresponding 'else' statement (likely via elifs too):
#define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif #pragma OPENCL EXTENSION cl_inte_printf: enable __kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out) { if (14 + -4 * gid(0) + -1 * lid(0) >= 0) out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0); else out[4 * gid(0) + lid(0)] = -1; }
Best,
Nick
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for this report. Issue is replicated and filed.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page