- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi all, I have run into a simple, reproducible segfault that occurs during compilation of a kernel, i.e. in clBuildProgram(). I am using the intel opencl-1.2-6.4.0.24 runtime on an Intel(R) Xeon(R) CPU X5650, on Red Hat Enterprise Linux Server release 7.3 (Maipo).
In main.c:
#include <stdlib.h> #include <string.h> #include <stdio.h> #include <CL/cl.h> #define MEM_SIZE (81) #define MAX_SOURCE_SIZE (0x100000) //simple error checking, not strictly necessary: #define err(ans) { cpu_assert((ans), __FILE__, __LINE__); } const char *getErrorString(cl_int error) { switch(error){ // run-time and JIT compiler errors case 0: return "CL_SUCCESS"; case -1: return "CL_DEVICE_NOT_FOUND"; case -2: return "CL_DEVICE_NOT_AVAILABLE"; case -3: return "CL_COMPILER_NOT_AVAILABLE"; case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE"; case -5: return "CL_OUT_OF_RESOURCES"; case -6: return "CL_OUT_OF_HOST_MEMORY"; case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE"; case -8: return "CL_MEM_COPY_OVERLAP"; case -9: return "CL_IMAGE_FORMAT_MISMATCH"; case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED"; case -11: return "CL_BUILD_PROGRAM_FAILURE"; case -12: return "CL_MAP_FAILURE"; case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET"; case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST"; case -15: return "CL_COMPILE_PROGRAM_FAILURE"; case -16: return "CL_LINKER_NOT_AVAILABLE"; case -17: return "CL_LINK_PROGRAM_FAILURE"; case -18: return "CL_DEVICE_PARTITION_FAILED"; case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE"; // compile-time errors case -30: return "CL_INVALID_VALUE"; case -31: return "CL_INVALID_DEVICE_TYPE"; case -32: return "CL_INVALID_PLATFORM"; case -33: return "CL_INVALID_DEVICE"; case -34: return "CL_INVALID_CONTEXT"; case -35: return "CL_INVALID_QUEUE_PROPERTIES"; case -36: return "CL_INVALID_COMMAND_QUEUE"; case -37: return "CL_INVALID_HOST_PTR"; case -38: return "CL_INVALID_MEM_OBJECT"; case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR"; case -40: return "CL_INVALID_IMAGE_SIZE"; case -41: return "CL_INVALID_SAMPLER"; case -42: return "CL_INVALID_BINARY"; case -43: return "CL_INVALID_BUILD_OPTIONS"; case -44: return "CL_INVALID_PROGRAM"; case -45: return "CL_INVALID_PROGRAM_EXECUTABLE"; case -46: return "CL_INVALID_KERNEL_NAME"; case -47: return "CL_INVALID_KERNEL_DEFINITION"; case -48: return "CL_INVALID_KERNEL"; case -49: return "CL_INVALID_ARG_INDEX"; case -50: return "CL_INVALID_ARG_VALUE"; case -51: return "CL_INVALID_ARG_SIZE"; case -52: return "CL_INVALID_KERNEL_ARGS"; case -53: return "CL_INVALID_WORK_DIMENSION"; case -54: return "CL_INVALID_WORK_GROUP_SIZE"; case -55: return "CL_INVALID_WORK_ITEM_SIZE"; case -56: return "CL_INVALID_GLOBAL_OFFSET"; case -57: return "CL_INVALID_EVENT_WAIT_LIST"; case -58: return "CL_INVALID_EVENT"; case -59: return "CL_INVALID_OPERATION"; case -60: return "CL_INVALID_GL_OBJECT"; case -61: return "CL_INVALID_BUFFER_SIZE"; case -62: return "CL_INVALID_MIP_LEVEL"; case -63: return "CL_INVALID_GLOBAL_WORK_SIZE"; case -64: return "CL_INVALID_PROPERTY"; case -65: return "CL_INVALID_IMAGE_DESCRIPTOR"; case -66: return "CL_INVALID_COMPILER_OPTIONS"; case -67: return "CL_INVALID_LINKER_OPTIONS"; case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT"; // extension errors case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR"; case -1001: return "CL_PLATFORM_NOT_FOUND_KHR"; case -1002: return "CL_INVALID_D3D10_DEVICE_KHR"; case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR"; case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR"; case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR"; default: return "Unknown OpenCL error"; } } void cpu_assert(cl_int x, const char *file, int line) { if (x != CL_SUCCESS) { fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line); exit(x); } } //main program int main() { double mem[MEM_SIZE] = {0}; cl_platform_id platform_id[10]; cl_device_id device_id = NULL; cl_context context = NULL; cl_command_queue command_queue = NULL; cl_mem memobj = NULL; cl_program program = NULL; cl_kernel kernel = NULL; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret; FILE *fp; const char fileName[] = "./kernel.cl"; size_t source_size; char *source_str; cl_int i; /* Load kernel source code */ fp = fopen(fileName, "r"); if (!fp) { exit(-1); } source_str = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); /* Get platform/device information */ err(clGetPlatformIDs(10, platform_id, &ret_num_platforms)); cl_platform_id pid = NULL; for (int i = 0; i < ret_num_platforms; ++i) { //check if intel char pvendor[500]; size_t psize = 500 * sizeof(char); //choose the first intel platofrm char intel_check[10] = "Intel"; err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL)); if(strstr(pvendor, intel_check) != NULL) { pid = platform_id; } } //get the Intel CPU err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices)); /* Create OpenCL Context */ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret); err(ret); /* Create Command Queue */ command_queue = clCreateCommandQueue(context, device_id, 0, &ret); err(ret); /* Create memory buffer*/ memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret); err(ret); /* Create Kernel program from the read in source */ program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret); err(ret); /* Build Kernel Program */ err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL)); /* Create OpenCL Kernel */ kernel = clCreateKernel(program, "test_kernel", &ret); err(ret); /* Set OpenCL kernel argument */ ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj); err(ret); size_t global_work_size[3] = {1, 0, 0}; size_t local_work_size[3] = {4, 0, 0}; /* Execute OpenCL kernel */ ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL); err(ret); /* Transfer result from the memory buffer */ ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL); err(ret); /* Display result */ for (i=0; i < MEM_SIZE; i++) { printf("%e\t", mem); } /* Finalization */ ret = clFlush(command_queue); ret = clFinish(command_queue); ret = clReleaseKernel(kernel); ret = clReleaseProgram(program); ret = clReleaseMemObject(memobj); ret = clReleaseCommandQueue(command_queue); ret = clReleaseContext(context); free(source_str); return 0; }
And in kernel.cl:
#define lid(N) ((int) get_local_id(N)) #define gid(N) ((int) get_group_id(N)) #if __OPENCL_C_VERSION__ < 120 #pragma OPENCL EXTENSION cl_khr_fp64: enable #endif __constant double const params[81] = { 0.826029585777618, 0.09277771524641742, 0.17882754585611516, 0.5063378202753985, 0.6152053087181504, 0.2880270862072688, 0.3129924612654047, 0.1914031592227472, 0.7102266835529006, 0.48932865515007107, 0.6061282135426413, 0.7596607644431532, 0.14443802597375788, 0.8116209377685191, 0.9594085162032434, 0.13075211361103556, 0.9224608320274585, 0.14604765433036915, 0.2596221225532682, 0.49918545558827154, 0.7450662624171099, 0.2667298203995915, 0.25658809473522426, 0.8326419218342502, 0.4342552237224352, 0.17536887526039147, 0.7307554279935198, 0.16662216310809286, 0.5729980215962235, 0.960525881776112, 0.6405413316234755, 0.3470773744166106, 0.8743972242812091, 0.30552499783741516, 0.3146807714222978, 0.7641117037190533, 0.4956119008256711, 0.9564385601232531, 0.0817308089707498, 0.5851026578901762, 0.09572537604291531, 0.7595279218060109, 0.3370657201439913, 0.09352025664655894, 0.352966288119304, 0.5307300151282943, 0.06732539048031061, 0.11708139095968984, 0.7255317496613602, 0.9816608694307325, 0.8171862183434712, 0.42590052091582375, 0.7227051679396143, 0.8383945203018864, 0.5021108846782305, 0.8536292405267636, 0.863285283964059, 0.18335701117563308, 0.4563413539390173, 0.7652079478016128, 0.431958947047663, 0.49298992135423214, 0.6306613411814528, 0.7182527828252896, 0.2918913305544274, 0.1922131983748544, 0.1473770002195013, 0.05404427061478689, 0.24071986186320615, 0.6771845487513621, 0.05844761644341512, 0.879924425441519, 0.17381661089494238, 0.475292639000336, 0.9467343353557718, 0.8799321075729781, 0.14852416386935496, 0.8957251952598398, 0.8342246883437114, 0.3828325906418706, 0.20051275899280996 }; __kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out) { for (int i_outer = 0; i_outer <= 20 + -1 * lid(0) + (3 * lid(0) / 4); ++i_outer) out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)]; }
The program was compiled using:
gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out
(note that turning off debug / and using O3 has the same result)
Program output:
Stack dump:
0. Running pass 'PrepareKernelArgs' on module 'main'.
gdb output:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7.x86_64 libgcc-4.8.5-11.el7.x86_64 libstdc++-4.8.5-11.el7.x86_64 ncurses-libs-5.9-13.20130511.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64 opencl-1.2-base-6.4.0.25-1.x86_64 opencl-1.2-intel-cpu-6.4.0.25-1.x86_64 zlib-1.2.7-17.el7.x86_64
(gdb) bt
#0 0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#1 0x00007ffff311d724 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#2 0x00007ffff311f610 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#3 0x00007ffff311f886 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#4 0x00007ffff311fcdb in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#5 0x00007ffff32b4557 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#6 0x00007ffff30404b8 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#7 0x00007ffff30214e7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#8 0x00007ffff3039909 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#9 0x00007ffff301ead9 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#10 0x00007ffff44755b7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so
#11 0x00007ffff55ba1d0 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#12 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#13 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#14 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#15 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#16 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#17 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#18 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#19 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#20 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#21 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#22 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#23 0x00007ffff55ba7e1 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#24 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#25 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#26 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#27 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#28 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#29 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#30 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#31 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#32 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#33 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#34 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#35 0x00007ffff55c096c in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#36 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#37 0x00007ffff4d575d5 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:676
#38 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#39 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#40 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#41 0x00007ffff55bfd95 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#42 0x00007ffff55ab890 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#43 0x00007ffff5587722 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#44 0x00007ffff554b055 in clBuildProgram () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#45 0x0000000000400ffe in main ()
valgrind output:
==10701== Memcheck, a memory error detector
==10701== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==10701== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==10701== Command: ./a.out
==10701== Parent PID: 5874
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x5D3559C: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F086DE: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x5D352B2: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F086EF: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701==
==10701== Warning: set address range perms: large range [0x1000000000, 0x2900000000) (noaccess)
==10701== Warning: set address range perms: large range [0x2900000000, 0x3000000000) (noaccess)
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x4084A8F: __intel_sse2_strrchr (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701== by 0x406D541: tbb::internal::init_dl_data() (dynamic_link.cpp:332)
==10701== by 0x406D476: __sti__$E (dynamic_link.cpp:495)
==10701== by 0x408F041: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701== by 0x4068732: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C876: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C888: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C89A: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C8AC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C8BE: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Invalid read of size 1
==10701== at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== Address 0x8 is not stack'd, malloc'd or (recently) free'd
==10701==
==10701==
==10701== Process terminating with default action of signal 11 (SIGSEGV)
==10701== Access not within mapped region at address 0x8
==10701== at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== If you believe this happened as a result of a stack
==10701== overflow in your program's main thread (unlikely but
==10701== possible), you can try to increase the size of the
==10701== main thread stack using the --main-stacksize= flag.
==10701== The main thread stack size used in this run was 8388608.
==10701==
==10701== HEAP SUMMARY:
==10701== in use at exit: 20,677,964 bytes in 133,880 blocks
==10701== total heap usage: 162,203 allocs, 28,323 frees, 33,802,790 bytes allocated
==10701==
==10701== LEAK SUMMARY:
==10701== definitely lost: 312 bytes in 5 blocks
==10701== indirectly lost: 0 bytes in 0 blocks
==10701== possibly lost: 1,844,672 bytes in 13,733 blocks
==10701== still reachable: 18,832,980 bytes in 120,142 blocks
==10701== of which reachable via heuristic:
==10701== stdstring : 220,711 bytes in 4,022 blocks
==10701== newarray : 37,008 bytes in 12 blocks
==10701== multipleinheritance: 928 bytes in 2 blocks
==10701== suppressed: 0 bytes in 0 blocks
==10701== Rerun with --leak-check=full to see details of leaked memory
==10701==
==10701== For counts of detected and suppressed errors, rerun with: -v
==10701== Use --track-origins=yes to see where uninitialised values come from
==10701== ERROR SUMMARY: 20 errors from 9 contexts (suppressed: 0
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Interestingly, modifying the kernel as follows
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out) { for (int i_outer = 0; i_outer <= 20 + (3 * lid(0) / 4) - lid(0); ++i_outer) out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)]; }
--note that the -1 * lid(0) has been moved to the end of the for-loop conditional and converted to a "true" subtraction
and fixing a minor bug in my worksize allocation:
size_t global_work_size[3] = {4, 0, 0}; size_t local_work_size[3] = {4, 0, 0};
resolves this segfault, and results in the correct output.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Any word on this, re: reproducibility? Or even just acknowledging that someone saw it??
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Looks like my earlier reply didn't post. Yes, the issue is reproduced and reported to the dev team. Thanks for letting us know.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page