Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Highlighted
Beginner
82 Views

Segfault on clBuildProgram for simple CPU Kernel

Hi all, I have run into a simple, reproducible segfault that occurs during compilation of a kernel, i.e. in clBuildProgram().  I am using the intel opencl-1.2-6.4.0.24 runtime on an Intel(R) Xeon(R) CPU X5650, on Red Hat Enterprise Linux Server release 7.3 (Maipo).

In main.c:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <CL/cl.h>

#define MEM_SIZE (81)
#define MAX_SOURCE_SIZE (0x100000)

//simple error checking, not strictly necessary:
#define err(ans) { cpu_assert((ans), __FILE__, __LINE__); } 
const char *getErrorString(cl_int error)
{
switch(error){
    // run-time and JIT compiler errors
    case 0: return "CL_SUCCESS";
    case -1: return "CL_DEVICE_NOT_FOUND";
    case -2: return "CL_DEVICE_NOT_AVAILABLE";
    case -3: return "CL_COMPILER_NOT_AVAILABLE";
    case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
    case -5: return "CL_OUT_OF_RESOURCES";
    case -6: return "CL_OUT_OF_HOST_MEMORY";
    case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
    case -8: return "CL_MEM_COPY_OVERLAP";
    case -9: return "CL_IMAGE_FORMAT_MISMATCH";
    case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
    case -11: return "CL_BUILD_PROGRAM_FAILURE";
    case -12: return "CL_MAP_FAILURE";
    case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
    case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
    case -15: return "CL_COMPILE_PROGRAM_FAILURE";
    case -16: return "CL_LINKER_NOT_AVAILABLE";
    case -17: return "CL_LINK_PROGRAM_FAILURE";
    case -18: return "CL_DEVICE_PARTITION_FAILED";
    case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";

    // compile-time errors
    case -30: return "CL_INVALID_VALUE";
    case -31: return "CL_INVALID_DEVICE_TYPE";
    case -32: return "CL_INVALID_PLATFORM";
    case -33: return "CL_INVALID_DEVICE";
    case -34: return "CL_INVALID_CONTEXT";
    case -35: return "CL_INVALID_QUEUE_PROPERTIES";
    case -36: return "CL_INVALID_COMMAND_QUEUE";
    case -37: return "CL_INVALID_HOST_PTR";
    case -38: return "CL_INVALID_MEM_OBJECT";
    case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
    case -40: return "CL_INVALID_IMAGE_SIZE";
    case -41: return "CL_INVALID_SAMPLER";
    case -42: return "CL_INVALID_BINARY";
    case -43: return "CL_INVALID_BUILD_OPTIONS";
    case -44: return "CL_INVALID_PROGRAM";
    case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
    case -46: return "CL_INVALID_KERNEL_NAME";
    case -47: return "CL_INVALID_KERNEL_DEFINITION";
    case -48: return "CL_INVALID_KERNEL";
    case -49: return "CL_INVALID_ARG_INDEX";
    case -50: return "CL_INVALID_ARG_VALUE";
    case -51: return "CL_INVALID_ARG_SIZE";
    case -52: return "CL_INVALID_KERNEL_ARGS";
    case -53: return "CL_INVALID_WORK_DIMENSION";
    case -54: return "CL_INVALID_WORK_GROUP_SIZE";
    case -55: return "CL_INVALID_WORK_ITEM_SIZE";
    case -56: return "CL_INVALID_GLOBAL_OFFSET";
    case -57: return "CL_INVALID_EVENT_WAIT_LIST";
    case -58: return "CL_INVALID_EVENT";
    case -59: return "CL_INVALID_OPERATION";
    case -60: return "CL_INVALID_GL_OBJECT";
    case -61: return "CL_INVALID_BUFFER_SIZE";
    case -62: return "CL_INVALID_MIP_LEVEL";
    case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
    case -64: return "CL_INVALID_PROPERTY";
    case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
    case -66: return "CL_INVALID_COMPILER_OPTIONS";
    case -67: return "CL_INVALID_LINKER_OPTIONS";
    case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";

    // extension errors
    case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
    case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
    case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
    case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
    case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
    case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
    default: return "Unknown OpenCL error";
    }
}
void cpu_assert(cl_int x, const char *file, int line) {
	if (x != CL_SUCCESS)
	{
		fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line);	
		exit(x);
	}
}

//main program
int main()	
{
	double mem[MEM_SIZE] = {0};

	cl_platform_id platform_id[10];
	cl_device_id device_id = NULL;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_mem memobj = NULL;
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	cl_uint ret_num_devices;
	cl_uint ret_num_platforms;
	cl_int ret;
	
	FILE *fp;
	const char fileName[] = "./kernel.cl";
	size_t source_size;
	char *source_str;
	cl_int i;
	
	/* Load kernel source code */
	fp = fopen(fileName, "r");
	if (!fp) {
		exit(-1);
	}
	source_str = (char *)malloc(MAX_SOURCE_SIZE);
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);
		
	/* Get platform/device information */
	err(clGetPlatformIDs(10, platform_id, &ret_num_platforms));
	cl_platform_id pid = NULL;
	for (int i = 0; i < ret_num_platforms; ++i)
	{
		//check if intel
		char pvendor[500];
		size_t psize = 500 * sizeof(char);
                //choose the first intel platofrm
		char intel_check[10] = "Intel";
		err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL));
		if(strstr(pvendor, intel_check) != NULL)
		{
			pid = platform_id;
		}
	}
        //get the Intel CPU
	err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices));
	
	/* Create OpenCL Context */
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	err(ret);	
	
	/* Create Command Queue */
	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
	err(ret);
	
	/* Create memory buffer*/
	memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret);
	err(ret);
	
	/* Create Kernel program from the read in source */
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	err(ret);

	/* Build Kernel Program */
	err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));

	/* Create OpenCL Kernel */
	kernel = clCreateKernel(program, "test_kernel", &ret);
	err(ret);

	/* Set OpenCL kernel argument */
	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
	err(ret);
	
	size_t global_work_size[3] = {1, 0, 0};
	size_t local_work_size[3] = {4, 0, 0};
	
	/* Execute OpenCL kernel */
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
	err(ret);

	/* Transfer result from the memory buffer */
	ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL);
	err(ret);
	
	/* Display result */
	for (i=0; i < MEM_SIZE; i++) {
		printf("%e\t", mem);
	}
	
	/* Finalization */
	ret = clFlush(command_queue);
	ret = clFinish(command_queue);
	ret = clReleaseKernel(kernel);
	ret = clReleaseProgram(program);
	ret = clReleaseMemObject(memobj);
	ret = clReleaseCommandQueue(command_queue);
	ret = clReleaseContext(context);
	
	free(source_str);
	
	return 0;
}

 

And in kernel.cl:

#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif

__constant double const params[81] = { 0.826029585777618, 0.09277771524641742, 0.17882754585611516, 0.5063378202753985, 0.6152053087181504, 0.2880270862072688, 0.3129924612654047, 0.1914031592227472, 0.7102266835529006, 0.48932865515007107, 0.6061282135426413, 0.7596607644431532, 0.14443802597375788, 0.8116209377685191, 0.9594085162032434, 0.13075211361103556, 0.9224608320274585, 0.14604765433036915, 0.2596221225532682, 0.49918545558827154, 0.7450662624171099, 0.2667298203995915, 0.25658809473522426, 0.8326419218342502, 0.4342552237224352, 0.17536887526039147, 0.7307554279935198, 0.16662216310809286, 0.5729980215962235, 0.960525881776112, 0.6405413316234755, 0.3470773744166106, 0.8743972242812091, 0.30552499783741516, 0.3146807714222978, 0.7641117037190533, 0.4956119008256711, 0.9564385601232531, 0.0817308089707498, 0.5851026578901762, 0.09572537604291531, 0.7595279218060109, 0.3370657201439913, 0.09352025664655894, 0.352966288119304, 0.5307300151282943, 0.06732539048031061, 0.11708139095968984, 0.7255317496613602, 0.9816608694307325, 0.8171862183434712, 0.42590052091582375, 0.7227051679396143, 0.8383945203018864, 0.5021108846782305, 0.8536292405267636, 0.863285283964059, 0.18335701117563308, 0.4563413539390173, 0.7652079478016128, 0.431958947047663, 0.49298992135423214, 0.6306613411814528, 0.7182527828252896, 0.2918913305544274, 0.1922131983748544, 0.1473770002195013, 0.05404427061478689, 0.24071986186320615, 0.6771845487513621, 0.05844761644341512, 0.879924425441519, 0.17381661089494238, 0.475292639000336, 0.9467343353557718, 0.8799321075729781, 0.14852416386935496, 0.8957251952598398, 0.8342246883437114, 0.3828325906418706, 0.20051275899280996 };

__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out)
{
  for (int i_outer = 0; i_outer <= 20 + -1 * lid(0) + (3 * lid(0) / 4); ++i_outer)
    out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)];
}

 

The program was compiled using:

gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out

(note that turning off debug / and using O3 has the same result)

Program output:

Stack dump:
0.      Running pass 'PrepareKernelArgs' on module 'main'.

gdb output:

Program received signal SIGSEGV, Segmentation fault.
0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.el7.x86_64 libgcc-4.8.5-11.el7.x86_64 libstdc++-4.8.5-11.el7.x86_64 ncurses-libs-5.9-13.20130511.el7.x86_64 numactl-libs-2.0.9-6.el7_2.x86_64 opencl-1.2-base-6.4.0.25-1.x86_64 opencl-1.2-intel-cpu-6.4.0.25-1.x86_64 zlib-1.2.7-17.el7.x86_64
(gdb) bt
#0  0x00007ffff315cdec in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#1  0x00007ffff311d724 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#2  0x00007ffff311f610 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#3  0x00007ffff311f886 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#4  0x00007ffff311fcdb in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#5  0x00007ffff32b4557 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#6  0x00007ffff30404b8 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#7  0x00007ffff30214e7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#8  0x00007ffff3039909 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#9  0x00007ffff301ead9 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so
#10 0x00007ffff44755b7 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so
#11 0x00007ffff55ba1d0 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#12 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#13 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#14 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#15 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#16 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#17 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#18 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#19 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#20 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#21 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#22 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#23 0x00007ffff55ba7e1 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#24 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#25 0x00007ffff4d57691 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:673
#26 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#27 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#28 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#29 0x00007ffff55bc629 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#30 0x00007ffff55dbdfc in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#31 0x00007ffff55dccbd in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#32 0x00007ffff55dcd5a in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#33 0x00007ffff55dbd48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#34 0x00007ffff55bb888 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#35 0x00007ffff55c096c in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#36 0x00007ffff52c9b59 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#37 0x00007ffff4d575d5 in tbb::interface7::internal::task_arena_base::internal_execute (this=0x0, d=...) at ../../src/tbb/arena.cpp:676
#38 0x00007ffff52c0e97 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libtask_executor.so
#39 0x00007ffff5539c48 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#40 0x00007ffff55bc550 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#41 0x00007ffff55bfd95 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#42 0x00007ffff55ab890 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#43 0x00007ffff5587722 in ?? () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#44 0x00007ffff554b055 in clBuildProgram () from /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so
#45 0x0000000000400ffe in main ()

 

valgrind output:

==10701== Memcheck, a memory error detector
==10701== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==10701== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==10701== Command: ./a.out
==10701== Parent PID: 5874
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x5D3559C: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F086DE: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701==    by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701==    by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x5D352B2: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F086EF: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5DFFD65: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F0AAAC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5F0AE9B: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5E01320: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5DD45D1: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5CD34AC: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x5CD3447: ??? (in /usr/lib64/nvidia/libnvidia-opencl.so.367.48)
==10701==    by 0x4E35811: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701==    by 0x4E39CB1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOpenCL.so.2.0)
==10701==    by 0x560EBAF: pthread_once (in /usr/lib64/libpthread-2.17.so)
==10701== 
==10701== Warning: set address range perms: large range [0x1000000000, 0x2900000000) (noaccess)
==10701== Warning: set address range perms: large range [0x2900000000, 0x3000000000) (noaccess)
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x4084A8F: __intel_sse2_strrchr (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701==    by 0x406D541: tbb::internal::init_dl_data() (dynamic_link.cpp:332)
==10701==    by 0x406D476: __sti__$E (dynamic_link.cpp:495)
==10701==    by 0x408F041: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701==    by 0x4068732: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libtbb.so.2)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x8F0C876: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==    by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x8F0C888: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==    by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x8F0C89A: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==    by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x8F0C8AC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==    by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== 
==10701== Conditional jump or move depends on uninitialised value(s)
==10701==    at 0x8F0C8BE: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC198C: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2496: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC2858: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC6D82: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DC1462: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE8D01: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F2DA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x88213E1: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x8821809: clDevCreateDeviceInstance (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x7884FA4: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==    by 0x7846B68: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701== 
==10701== Invalid read of size 1
==10701==    at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==  Address 0x8 is not stack'd, malloc'd or (recently) free'd
==10701== 
==10701== 
==10701== Process terminating with default action of signal 11 (SIGSEGV)
==10701==  Access not within mapped region at address 0x8
==10701==    at 0x8F0ADEC: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECB723: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECD60F: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECD885: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8ECDCDA: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x9062556: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DEE4B7: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DCF4E6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DE7908: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x8DCCAD8: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libOclCpuBackEnd.so)
==10701==    by 0x880F5B6: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libcpu_device.so)
==10701==    by 0x78521CF: ??? (in /opt/intel/opencl-1.2-6.4.0.25/lib64/libintelocl.so)
==10701==  If you believe this happened as a result of a stack
==10701==  overflow in your program's main thread (unlikely but
==10701==  possible), you can try to increase the size of the
==10701==  main thread stack using the --main-stacksize= flag.
==10701==  The main thread stack size used in this run was 8388608.
==10701== 
==10701== HEAP SUMMARY:
==10701==     in use at exit: 20,677,964 bytes in 133,880 blocks
==10701==   total heap usage: 162,203 allocs, 28,323 frees, 33,802,790 bytes allocated
==10701== 
==10701== LEAK SUMMARY:
==10701==    definitely lost: 312 bytes in 5 blocks
==10701==    indirectly lost: 0 bytes in 0 blocks
==10701==      possibly lost: 1,844,672 bytes in 13,733 blocks
==10701==    still reachable: 18,832,980 bytes in 120,142 blocks
==10701==                       of which reachable via heuristic:
==10701==                         stdstring          : 220,711 bytes in 4,022 blocks
==10701==                         newarray           : 37,008 bytes in 12 blocks
==10701==                         multipleinheritance: 928 bytes in 2 blocks
==10701==         suppressed: 0 bytes in 0 blocks
==10701== Rerun with --leak-check=full to see details of leaked memory
==10701== 
==10701== For counts of detected and suppressed errors, rerun with: -v
==10701== Use --track-origins=yes to see where uninitialised values come from
==10701== ERROR SUMMARY: 20 errors from 9 contexts (suppressed: 0

 

 

 

0 Kudos
3 Replies
Highlighted
Beginner
82 Views

Interestingly, modifying the kernel as follows 

__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) loopy_kernel(__global double *restrict out)
{
  for (int i_outer = 0; i_outer <= 20  + (3 * lid(0) / 4) - lid(0); ++i_outer)
    out[4 * i_outer + lid(0)] = params[4 * i_outer + lid(0)];
}

--note that the -1 * lid(0) has been moved to the end of the for-loop conditional and converted to a "true" subtraction

and fixing a minor bug in my worksize allocation:

        size_t global_work_size[3] = {4, 0, 0};
        size_t local_work_size[3] = {4, 0, 0};

resolves this segfault, and results in the correct output.

0 Kudos
Highlighted
Beginner
82 Views

Any word on this, re: reproducibility?  Or even just acknowledging that someone saw it??

0 Kudos
Highlighted
82 Views

Looks like my earlier reply didn't post.  Yes, the issue is reproduced and reported to the dev team.  Thanks for letting us know.

0 Kudos