OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU.
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1719 Discussions

Incorrect output for simple if statement kernel

Nicholas_C_1
Beginner
527 Views

First, to preface I believe this is related to the bug reported here (https://software.intel.com/en-us/forums/opencl/topic/704155).  However, I'm reporting anyways on the chance that it is indeed distinct.  In any case it's appeared under new circumstance to the user (i.e. me)

main.c program code:

#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <CL/cl.h>

#define MEM_SIZE (16)
#define MAX_SOURCE_SIZE (0x100000)

//simple error checking, not strictly necessary:
#define err(ans) { cpu_assert((ans), __FILE__, __LINE__); } 
const char *getErrorString(cl_int error)
{
switch(error){
    // run-time and JIT compiler errors
    case 0: return "CL_SUCCESS";
    case -1: return "CL_DEVICE_NOT_FOUND";
    case -2: return "CL_DEVICE_NOT_AVAILABLE";
    case -3: return "CL_COMPILER_NOT_AVAILABLE";
    case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
    case -5: return "CL_OUT_OF_RESOURCES";
    case -6: return "CL_OUT_OF_HOST_MEMORY";
    case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
    case -8: return "CL_MEM_COPY_OVERLAP";
    case -9: return "CL_IMAGE_FORMAT_MISMATCH";
    case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
    case -11: return "CL_BUILD_PROGRAM_FAILURE";
    case -12: return "CL_MAP_FAILURE";
    case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
    case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
    case -15: return "CL_COMPILE_PROGRAM_FAILURE";
    case -16: return "CL_LINKER_NOT_AVAILABLE";
    case -17: return "CL_LINK_PROGRAM_FAILURE";
    case -18: return "CL_DEVICE_PARTITION_FAILED";
    case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";

    // compile-time errors
    case -30: return "CL_INVALID_VALUE";
    case -31: return "CL_INVALID_DEVICE_TYPE";
    case -32: return "CL_INVALID_PLATFORM";
    case -33: return "CL_INVALID_DEVICE";
    case -34: return "CL_INVALID_CONTEXT";
    case -35: return "CL_INVALID_QUEUE_PROPERTIES";
    case -36: return "CL_INVALID_COMMAND_QUEUE";
    case -37: return "CL_INVALID_HOST_PTR";
    case -38: return "CL_INVALID_MEM_OBJECT";
    case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
    case -40: return "CL_INVALID_IMAGE_SIZE";
    case -41: return "CL_INVALID_SAMPLER";
    case -42: return "CL_INVALID_BINARY";
    case -43: return "CL_INVALID_BUILD_OPTIONS";
    case -44: return "CL_INVALID_PROGRAM";
    case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
    case -46: return "CL_INVALID_KERNEL_NAME";
    case -47: return "CL_INVALID_KERNEL_DEFINITION";
    case -48: return "CL_INVALID_KERNEL";
    case -49: return "CL_INVALID_ARG_INDEX";
    case -50: return "CL_INVALID_ARG_VALUE";
    case -51: return "CL_INVALID_ARG_SIZE";
    case -52: return "CL_INVALID_KERNEL_ARGS";
    case -53: return "CL_INVALID_WORK_DIMENSION";
    case -54: return "CL_INVALID_WORK_GROUP_SIZE";
    case -55: return "CL_INVALID_WORK_ITEM_SIZE";
    case -56: return "CL_INVALID_GLOBAL_OFFSET";
    case -57: return "CL_INVALID_EVENT_WAIT_LIST";
    case -58: return "CL_INVALID_EVENT";
    case -59: return "CL_INVALID_OPERATION";
    case -60: return "CL_INVALID_GL_OBJECT";
    case -61: return "CL_INVALID_BUFFER_SIZE";
    case -62: return "CL_INVALID_MIP_LEVEL";
    case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
    case -64: return "CL_INVALID_PROPERTY";
    case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
    case -66: return "CL_INVALID_COMPILER_OPTIONS";
    case -67: return "CL_INVALID_LINKER_OPTIONS";
    case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";

    // extension errors
    case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
    case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
    case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
    case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
    case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
    case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
    default: return "Unknown OpenCL error";
    }
}
void cpu_assert(cl_int x, const char *file, int line) {
	if (x != CL_SUCCESS)
	{
		fprintf(stderr,"cpu_assert: %s %s %d\n", getErrorString(x), file, line);	
		exit(x);
	}
}

//main program
int main()	
{
	double mem[MEM_SIZE] = {0};

	cl_platform_id platform_id[10];
	cl_device_id device_id = NULL;
	cl_context context = NULL;
	cl_command_queue command_queue = NULL;
	cl_mem memobj = NULL;
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	cl_uint ret_num_devices;
	cl_uint ret_num_platforms;
	cl_int ret;
	
	FILE *fp;
	const char fileName[] = "./kernel.cl";
	size_t source_size;
	char *source_str;
	cl_int i;
	
	/* Load kernel source code */
	fp = fopen(fileName, "r");
	if (!fp) {
		exit(-1);
	}
	source_str = (char *)malloc(MAX_SOURCE_SIZE);
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);
		
	/* Get platform/device information */
	err(clGetPlatformIDs(10, platform_id, &ret_num_platforms));
	cl_platform_id pid = NULL;
	for (int i = 0; i < ret_num_platforms; ++i)
	{
		//check if intel
		char pvendor[500];
		size_t psize = 500 * sizeof(char);
                //choose the first intel platofrm
		char intel_check[10] = "Intel";
		err(clGetPlatformInfo(platform_id, CL_PLATFORM_VENDOR, psize, pvendor, NULL));
		if(strstr(pvendor, intel_check) != NULL)
		{
			pid = platform_id;
		}
	}
        //get the Intel CPU
	err(clGetDeviceIDs(pid, CL_DEVICE_TYPE_CPU, 1, &device_id, &ret_num_devices));
	
	/* Create OpenCL Context */
	context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
	err(ret);	
	
	/* Create Command Queue */
	command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
	err(ret);
	
	/* Create memory buffer*/
	memobj = clCreateBuffer(context, CL_MEM_READ_WRITE, MEM_SIZE * sizeof(double), NULL, &ret);
	err(ret);
	
	/* Create Kernel program from the read in source */
	program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);
	err(ret);

	/* Build Kernel Program */
	err(clBuildProgram(program, 1, &device_id, NULL, NULL, NULL));

	/* Create OpenCL Kernel */
	kernel = clCreateKernel(program, "test_kernel", &ret);
	err(ret);

	/* Set OpenCL kernel argument */
	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
	err(ret);
	
	size_t global_work_size[3] = {MEM_SIZE, 0, 0};
	size_t local_work_size[3] = {4, 0, 0};
	
	/* Execute OpenCL kernel */
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, global_work_size, local_work_size, 0, NULL, NULL);
	err(ret);

	/* Transfer result from the memory buffer */
	ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0, MEM_SIZE * sizeof(double), mem, 0, NULL, NULL);
	err(ret);
	
	/* Display result */
	for (i=0; i < MEM_SIZE; i++) {
		printf("%e\t", mem);
	}
	
	/* Finalization */
	ret = clFlush(command_queue);
	ret = clFinish(command_queue);
	ret = clReleaseKernel(kernel);
	ret = clReleaseProgram(program);
	ret = clReleaseMemObject(memobj);
	ret = clReleaseCommandQueue(command_queue);
	ret = clReleaseContext(context);
	
	free(source_str);
	
	return 0;
}

kernel.cl code:

#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable

__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
    if (14 + -4 * gid(0) + -1 * lid(0) >= 0)
    	out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
}

and is compiled with:

gcc -std=c99 -c main.c -I/opt/opencl-headers/ -o main.o -O0 -g && gcc main.o -Wl,-rpath,/opt/intel/opencl/lib64/ -lOpenCL -o a.out

 

This outputs on my system ( intel opencl-1.2-6.4.0.24 runtime, Intel(R) Xeon(R) CPU X5650, on RHEL 7.3 (Maipo)):

0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00    0.000000e+00   0.000000e+00     0.000000e+00    0.000000e+00    0.000000e+00 

Where we would expect 0 - 15, with the last number being unset.

As with the previous bug (https://software.intel.com/en-us/forums/opencl/topic/704155), the error can be resolved by switching to a "pure" minus, e.g.:

#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable

__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
    if (14 - 4 * gid(0) - 1 * lid(0) >= 0)
        out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
}

OR by adding a corresponding 'else' statement (likely via elifs too):

#define lid(N) ((int) get_local_id(N))
#define gid(N) ((int) get_group_id(N))
#if __OPENCL_C_VERSION__ < 120
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#pragma OPENCL EXTENSION cl_inte_printf: enable

__kernel void __attribute__ ((reqd_work_group_size(4, 1, 1))) test_kernel(__global double *restrict out)
{
    if (14 + -4 * gid(0) + -1 * lid(0) >= 0)
    	out[4 * gid(0) + lid(0)] = 4 * gid(0) + lid(0);
    else
        out[4 * gid(0) + lid(0)] = -1;
}

 

Best,

Nick

0 Kudos
1 Reply
Jeffrey_M_Intel1
Employee
527 Views

Thanks for this report.  Issue is replicated and filed.

0 Kudos
Reply