- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I suspect there's an aspect of clSetEventCallback that I'm misunderstanding. If you have a look at this simplified code below, you can see a kernel call, followed by a clSetEventCallback. The program then sits and waits on a windows event (inside the callback) to be set.
If the clSetEventCallback is immediately followed by a clFlush(), then the callback is called and the wait released as expected. However without the clFlush, the kernel is never called, nor its callback, and the wait is eternal. What am I missing here?
Environment: i7-4770, Intel OpenCL SDK 4.6, windows 7 sp1, visual studio vs2013, driver 15.36.19
#include "CL\cl.h" #include <windows.h> #include "stdafx.h" int initialiseEnvironment( char *KernelSource, cl_device_id *device_id, cl_context *context, cl_command_queue *commandqueue, cl_program *program ); void __stdcall kernel_complete_callback(cl_event complete_event, cl_int cmd_sts, void *user_data) { HANDLE* bufferEventHandle = (HANDLE*)user_data; if (!SetEvent(*bufferEventHandle)) printf("callback fail"); } int _tmain(int argc, _TCHAR* argv[]) { DWORD dwWaitResult; cl_int errcode_ret = CL_SUCCESS; char *KernelSource = "\n" \ "__kernel void debugTest() \n" \ "{ \n" \ " printf(\"here\"); \n" \ "} \n" \ "\n"; cl_device_id device_id; cl_context context; cl_command_queue commandQueue; cl_program program; // set up the opencl basics initialiseEnvironment( KernelSource, &device_id, &context, &commandQueue, &program); cl_kernel debugTest = clCreateKernel(program, "debugTest", &errcode_ret); if (CL_SUCCESS != errcode_ret) return 0; HANDLE bufferCompleteEvent = CreateEvent(NULL, TRUE, FALSE, NULL); if (bufferCompleteEvent == NULL) return 0; if (!ResetEvent(bufferCompleteEvent))return 0; cl_event kernelCompleteEvent; size_t threadSize = 1; errcode_ret = clEnqueueNDRangeKernel( commandQueue, debugTest, 1, 0, &threadSize, NULL, 0, NULL, &(kernelCompleteEvent)); if (CL_SUCCESS != errcode_ret) return 0; errcode_ret = clSetEventCallback(kernelCompleteEvent, CL_COMPLETE, &kernel_complete_callback, &bufferCompleteEvent); if (CL_SUCCESS != errcode_ret) return 0; clFlush(commandQueue); // if commented, then program never gets beyond wait on next line dwWaitResult = WaitForSingleObject(bufferCompleteEvent, INFINITE); if (dwWaitResult != WAIT_OBJECT_0) return 0; return 0; } int initialiseEnvironment( char *KernelSource, cl_device_id *device_id, cl_context *context, cl_command_queue *commandqueue, cl_program *program ) { int err; cl_uint platformCount; clGetPlatformIDs(0, NULL, &platformCount); cl_platform_id *platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * platformCount); clGetPlatformIDs(platformCount, platforms, NULL); unsigned int selectedPlatform = -1; for (unsigned int i = 0; i < platformCount; i++) { char* value; size_t size = 0; clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, NULL, &size); value = (char*)malloc(sizeof(char) * size); clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, value, NULL); if (strcmp(value, "Intel(R) OpenCL") == 0) { selectedPlatform = i; break; } } unsigned int deviceCount; err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount); if (CL_SUCCESS != err) { printf("Error: Failed to clGetDeviceIDs, returned\n"); return 0; } cl_device_id *devices = (cl_device_id*)malloc(sizeof(cl_device_id) * deviceCount); clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL); if (CL_SUCCESS != err) { printf("Error: Failed to clGetDeviceIDs\n"); return 0; } *device_id = devices[0]; *context = clCreateContext(0, 1, device_id, NULL, NULL, &err); if (!context) { printf("Error: Failed to create a compute context!\n"); return 0; } *commandqueue = clCreateCommandQueue(*context, *device_id, 0, &err); if (!commandqueue) { printf("Error: Failed to create a command commands!\n"); return 0; } *program = clCreateProgramWithSource(*context, 1, (const char **)& KernelSource, NULL, &err); if (!program) { printf("Error: Failed to create compute program!\n"); return 0; } err = clBuildProgram(*program, 0, NULL, NULL, NULL, NULL); if (err != CL_SUCCESS) { // Determine the reason for the error char buildLog[16384]; clGetProgramBuildInfo(*program, *device_id, CL_PROGRAM_BUILD_LOG, sizeof(buildLog), buildLog, NULL); printf("Error in program: %s", buildLog); clReleaseProgram(*program); return 0; } return 1; }
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Philip,
You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Philip,
You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for the quick reply Robert: helps a lot.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page