- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi all, I have run into a simple, reproducible segfault that occurs during compilation of a kernel, i.e. in clBuildProgram(). I am using the intel opencl-1.2-6.4.0.24 runtime on an Intel(R) Xeon(R) CPU X5650, on Red Hat Enterprise Linux Server release 7.3 (Maipo).
In main.c:
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <CL/cl.h>
#define MEM_SIZE (81)
#define MAX_SOURCE_SIZE (0x100000)
//simple error checking, not strictly necessary:
#define err(ans) { cpu_assert((ans), __FILE__, __LINE__); }
const char *getErrorString(cl_int error)
{
switch(error){
// run-time and JIT compiler errors
case 0: return "CL_SUCCESS";
case -1: return "CL_DEVICE_NOT_FOUND";
case -2: return "CL_DEVICE_NOT_AVAILABLE";
case -3: return "CL_COMPILER_NOT_AVAILABLE";
case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
case -5: return "CL_OUT_OF_RESOURCES";
case -6: return "CL_OUT_OF_HOST_MEMORY";
case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
case -8: return "CL_MEM_COPY_OVERLAP";
case -9: return "CL_IMAGE_FORMAT_MISMATCH";
case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
case -11: return "CL_BUILD_PROGRAM_FAILURE";
case -12: return "CL_MAP_FAILURE";
case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
case -15: return "CL_COMPILE_PROGRAM_FAILURE";
case -16: return "CL_LINKER_NOT_AVAILABLE";
case -17: return "CL_LINK_PROGRAM_FAILURE";
case -18: return "CL_DEVICE_PARTITION_FAILED";
case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
// compile-time errors
case -30: return "CL_INVALID_VALUE";
case -31: return "CL_INVALID_DEVICE_TYPE";
case -32: return "CL_INVALID_PLATFORM";
case -33: return "CL_INVALID_DEVICE";
case -34: return "CL_INVALID_CONTEXT";
case -35: return "CL_INVALID_QUEUE_PROPERTIES";
case -36: return "CL_INVALID_COMMAND_QUEUE";
case -37: return "CL_INVALID_HOST_PTR";
case -38: return "CL_INVALID_MEM_OBJECT";
case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
case -40: return "CL_INVALID_IMAGE_SIZE";
case -41: return "CL_INVALID_SAMPLER";
case -42: return "CL_INVALID_BINARY";
case -43: return "CL_INVALID_BUILD_OPTIONS";
case -44: return "CL_INVALID_PROGRAM";
case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
case -46: return "CL_INVALID_KERNEL_NAME";
case -47: return "CL_INVALID_KERNEL_DEFINITION";
case -48: return "CL_INVALID_KERNEL";
case -49: return "CL_INVALID_ARG_INDEX";
case -50: return "CL_INVALID_ARG_VALUE";
case -51: return "CL_INVALID_ARG_SIZE";
case -52: return "CL_INVALID_KERNEL_ARGS";
case -53: return "CL_INVALID_WORK_DIMENSION";
case -54: return "CL_INVALID_WORK_GROUP_SIZE";
case -55: return "CL_INVALID_WORK_ITEM_SIZE";
case -56: return "CL_INVALID_GLOBAL_OFFSET";
case -57: return "CL_INVALID_EVENT_WAIT_LIST";
case -58: return "CL_INVALID_EVENT";
case -59: return "CL_INVALID_OPERATION";
case -60: return "CL_INVALID_GL_OBJECT";
case -61: return "CL_INVALID_BUFFER_SIZE";
case -62: return "CL_INVALID_MIP_LEVEL";
case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
case -64: return "CL_INVALID_PROPERTY";
case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
case -66: return "CL_INVALID_COMPILER_OPTIONS";
case -67: return "CL_INVALID_LINKER_OPTIONS";
case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
// extension errors
case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
default: return "Unknown OpenCL error";
}
}
void cpu_assert(cl_int x, const char *file, int line) {
if (x != CL_SUCCESS)
{
fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line);
exit(x);
}
}
//main program
int main()
{
double mem[MEM_SIZE] = {0};
cl_platform_id platform_id[10];
cl_device_id device_id = NULL;
cl_context context = NULL;
cl_command_queue command_queue = NULL;
cl_mem memobj = NULL;
cl_program program = NULL;
cl_kernel kernel = NULL;
cl_uint ret_num_devices;
cl_uint ret_num_platforms;
cl_int ret;
FILE *fp;
const char fileName[] = "./kernel.cl";
size_t source_size;
char *source_str;
cl_int i;
/* Load kernel source code */
fp = fopen(fileName, "r");
if (!fp) {
exit(-1);
}
source_str = (char *)malloc(MAX_SOURCE_SIZE);
source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
fclose(fp);
/* Get platform/device information */
err(clGetPlatformIDs(10, platform_id, &ret_num_platforms));
cl_platform_id pid = NULL;
for (int i = 0; i < ret_num_platforms; ++i)
{
//check if intel
char pvendor[500];
size_t psize = 500 * sizeof(char);
//choose the first intel platofrm
char intel_check[10] = "Intel";
err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL));
if(strstr(pvendor, intel_check) != NULL)
{
pid = platform_id;
}
}
//get the Intel CPU
err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices));
/* Create OpenCL Context */
context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
err(ret);
/* Create Command Queue */
command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
err(ret);
/* Create memory buffer*/
memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret);
err(ret);
/* Create Kernel program from the read in source */
program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
err(ret);
/* Build Kernel Program */
err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));
/* Create OpenCL Kernel */
kernel = clCreateKernel(program, "test_kernel", &ret);
err(ret);
/* Set OpenCL kernel argument */
ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
err(ret);
size_t global_work_size[3] = {1, 0, 0};
size_t local_work_size[3] = {4, 0, 0};
/* Execute OpenCL kernel */
ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
err(ret);
/* Transfer result from the memory buffer */
ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL);
err(ret);
/* Display result */
for (i=0; i < MEM_SIZE; i++) {
printf("%e\t", mem);
}
/* Finalization */
ret = clFlush(command_queue);
ret = clFinish(command_queue);
ret = clReleaseKernel(kernel);
ret = clReleaseProgram(program);
ret = clReleaseMemObject(memobj);
ret = clReleaseCommandQueue(command_queue);
ret = clReleaseContext(context);
free(source_str);
return 0;
}
And in kernel.cl:
#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
__constant double const params[81] = { 0.826029585777618, 0.09277771524641742, 0.17882754585611516, 0.5063378202753985, 0.6152053087181504, 0.2880270862072688, 0.3129924612654047, 0.1914031592227472, 0.7102266835529006, 0.48932865515007107, 0.6061282135426413, 0.7596607644431532, 0.14443802597375788, 0.8116209377685191, 0.9594085162032434, 0.13075211361103556, 0.9224608320274585, 0.14604765433036915, 0.2596221225532682, 0.49918545558827154, 0.7450662624171099, 0.2667298203995915, 0.25658809473522426, 0.8326419218342502, 0.4342552237224352, 0.17536887526039147, 0.7307554279935198, 0.16662216310809286, 0.5729980215962235, 0.960525881776112, 0.6405413316234755, 0.3470773744166106, 0.8743972242812091, 0.30552499783741516, 0.3146807714222978, 0.7641117037190533, 0.4956119008256711, 0.9564385601232531, 0.0817308089707498, 0.5851026578901762, 0.09572537604291531, 0.7595279218060109, 0.3370657201439913, 0.09352025664655894, 0.352966288119304, 0.5307300151282943, 0.06732539048031061, 0.11708139095968984, 0.7255317496613602, 0.9816608694307325, 0.8171862183434712, 0.42590052091582375, 0.7227051679396143, 0.8383945203018864, 0.5021108846782305, 0.8536292405267636, 0.863285283964059, 0.18335701117563308, 0.4563413539390173, 0.7652079478016128, 0.431958947047663, 0.49298992135423214, 0.6306613411814528, 0.7182527828252896, 0.2918913305544274, 0.1922131983748544, 0.1473770002195013, 0.05404427061478689, 0.24071986186320615, 0.6771845487513621, 0.05844761644341512, 0.879924425441519, 0.17381661089494238, 0.475292639000336, 0.9467343353557718, 0.8799321075729781, 0.14852416386935496, 0.8957251952598398, 0.8342246883437114, 0.3828325906418706, 0.20051275899280996 };
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out)
{
for (int i_outer = 0; i_outer <= 20 + -1 * lid(0) + (3 * lid(0) / 4); ++i_outer)
out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)];
}
The program was compiled using:
gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out
(note that turning off debug / and using O3 has the same result)
Program output:
Stack dump:
0. Running pass 'PrepareKernelArgs' on module 'main'.
gdb output:
Program received signal SIGSEGV, Segmentation fault.
0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7.x86_64 libgcc-4.8.5-11.el7.x86_64 libstdc++-4.8.5-11.el7.x86_64 ncurses-libs-5.9-13.20130511.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64 opencl-1.2-base-6.4.0.25-1.x86_64 opencl-1.2-intel-cpu-6.4.0.25-1.x86_64 zlib-1.2.7-17.el7.x86_64
(gdb) bt
#0 0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#1 0x00007ffff311d724 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#2 0x00007ffff311f610 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#3 0x00007ffff311f886 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#4 0x00007ffff311fcdb in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#5 0x00007ffff32b4557 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#6 0x00007ffff30404b8 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#7 0x00007ffff30214e7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#8 0x00007ffff3039909 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#9 0x00007ffff301ead9 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#10 0x00007ffff44755b7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so
#11 0x00007ffff55ba1d0 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#12 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#13 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#14 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#15 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#16 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#17 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#18 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#19 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#20 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#21 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#22 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#23 0x00007ffff55ba7e1 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#24 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#25 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#26 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#27 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#28 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#29 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#30 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#31 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#32 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#33 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#34 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#35 0x00007ffff55c096c in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#36 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#37 0x00007ffff4d575d5 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:676
#38 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#39 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#40 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#41 0x00007ffff55bfd95 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#42 0x00007ffff55ab890 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#43 0x00007ffff5587722 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#44 0x00007ffff554b055 in clBuildProgram () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#45 0x0000000000400ffe in main ()
valgrind output:
==10701== Memcheck, a memory error detector
==10701== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==10701== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==10701== Command: ./a.out
==10701== Parent PID: 5874
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x5D3559C: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F086DE: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x5D352B2: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F086EF: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701== by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701== by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701==
==10701== Warning: set address range perms: large range [0x1000000000, 0x2900000000) (noaccess)
==10701== Warning: set address range perms: large range [0x2900000000, 0x3000000000) (noaccess)
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x4084A8F: __intel_sse2_strrchr (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701== by 0x406D541: tbb::internal::init_dl_data() (dynamic_link.cpp:332)
==10701== by 0x406D476: __sti__$E (dynamic_link.cpp:495)
==10701== by 0x408F041: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701== by 0x4068732: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C876: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C888: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C89A: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C8AC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Conditional jump or move depends on uninitialised value(s)
==10701== at 0x8F0C8BE: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==
==10701== Invalid read of size 1
==10701== at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== Address 0x8 is not stack'd, malloc'd or (recently) free'd
==10701==
==10701==
==10701== Process terminating with default action of signal 11 (SIGSEGV)
==10701== Access not within mapped region at address 0x8
==10701== at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701== by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701== by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== If you believe this happened as a result of a stack
==10701== overflow in your program's main thread (unlikely but
==10701== possible), you can try to increase the size of the
==10701== main thread stack using the --main-stacksize= flag.
==10701== The main thread stack size used in this run was 8388608.
==10701==
==10701== HEAP SUMMARY:
==10701== in use at exit: 20,677,964 bytes in 133,880 blocks
==10701== total heap usage: 162,203 allocs, 28,323 frees, 33,802,790 bytes allocated
==10701==
==10701== LEAK SUMMARY:
==10701== definitely lost: 312 bytes in 5 blocks
==10701== indirectly lost: 0 bytes in 0 blocks
==10701== possibly lost: 1,844,672 bytes in 13,733 blocks
==10701== still reachable: 18,832,980 bytes in 120,142 blocks
==10701== of which reachable via heuristic:
==10701== stdstring : 220,711 bytes in 4,022 blocks
==10701== newarray : 37,008 bytes in 12 blocks
==10701== multipleinheritance: 928 bytes in 2 blocks
==10701== suppressed: 0 bytes in 0 blocks
==10701== Rerun with --leak-check=full to see details of leaked memory
==10701==
==10701== For counts of detected and suppressed errors, rerun with: -v
==10701== Use --track-origins=yes to see where uninitialised values come from
==10701== ERROR SUMMARY: 20 errors from 9 contexts (suppressed: 0
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Interestingly, modifying the kernel as follows
__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out)
{
for (int i_outer = 0; i_outer <= 20 + (3 * lid(0) / 4) - lid(0); ++i_outer)
out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)];
}
--note that the -1 * lid(0) has been moved to the end of the for-loop conditional and converted to a "true" subtraction
and fixing a minor bug in my worksize allocation:
size_t global_work_size[3] = {4, 0, 0};
size_t local_work_size[3] = {4, 0, 0};
resolves this segfault, and results in the correct output.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Any word on this, re: reproducibility? Or even just acknowledging that someone saw it??
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Looks like my earlier reply didn't post. Yes, the issue is reproduced and reported to the dev team. Thanks for letting us know.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page