- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I suspect there's an aspect of clSetEventCallback that I'm misunderstanding. If you have a look at this simplified code below, you can see a kernel call, followed by a clSetEventCallback. The program then sits and waits on a windows event (inside the callback) to be set.
If the clSetEventCallback is immediately followed by a clFlush(), then the callback is called and the wait released as expected. However without the clFlush, the kernel is never called, nor its callback, and the wait is eternal. What am I missing here?
Environment: i7-4770, Intel OpenCL SDK 4.6, windows 7 sp1, visual studio vs2013, driver 15.36.19
#include "CL\cl.h"
#include <windows.h>
#include "stdafx.h"
int initialiseEnvironment(
char *KernelSource,
cl_device_id *device_id,
cl_context *context,
cl_command_queue *commandqueue,
cl_program *program
);
void __stdcall kernel_complete_callback(cl_event complete_event, cl_int cmd_sts, void *user_data)
{
HANDLE* bufferEventHandle = (HANDLE*)user_data;
if (!SetEvent(*bufferEventHandle))
printf("callback fail");
}
int _tmain(int argc, _TCHAR* argv[])
{
DWORD dwWaitResult;
cl_int errcode_ret = CL_SUCCESS;
char *KernelSource = "\n" \
"__kernel void debugTest() \n" \
"{ \n" \
" printf(\"here\"); \n" \
"} \n" \
"\n";
cl_device_id device_id;
cl_context context;
cl_command_queue commandQueue;
cl_program program;
// set up the opencl basics
initialiseEnvironment(
KernelSource,
&device_id,
&context,
&commandQueue,
&program);
cl_kernel debugTest = clCreateKernel(program, "debugTest", &errcode_ret);
if (CL_SUCCESS != errcode_ret) return 0;
HANDLE bufferCompleteEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
if (bufferCompleteEvent == NULL) return 0;
if (!ResetEvent(bufferCompleteEvent))return 0;
cl_event kernelCompleteEvent;
size_t threadSize = 1;
errcode_ret = clEnqueueNDRangeKernel(
commandQueue,
debugTest,
1,
0,
&threadSize,
NULL,
0,
NULL,
&(kernelCompleteEvent));
if (CL_SUCCESS != errcode_ret)
return 0;
errcode_ret = clSetEventCallback(kernelCompleteEvent, CL_COMPLETE, &kernel_complete_callback, &bufferCompleteEvent);
if (CL_SUCCESS != errcode_ret)
return 0;
clFlush(commandQueue); // if commented, then program never gets beyond wait on next line
dwWaitResult = WaitForSingleObject(bufferCompleteEvent, INFINITE);
if (dwWaitResult != WAIT_OBJECT_0)
return 0;
return 0;
}
int initialiseEnvironment(
char *KernelSource,
cl_device_id *device_id,
cl_context *context,
cl_command_queue *commandqueue,
cl_program *program
)
{
int err;
cl_uint platformCount;
clGetPlatformIDs(0, NULL, &platformCount);
cl_platform_id *platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * platformCount);
clGetPlatformIDs(platformCount, platforms, NULL);
unsigned int selectedPlatform = -1;
for (unsigned int i = 0; i < platformCount; i++) {
char* value;
size_t size = 0;
clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, NULL, &size);
value = (char*)malloc(sizeof(char) * size);
clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, value, NULL);
if (strcmp(value, "Intel(R) OpenCL") == 0) {
selectedPlatform = i;
break;
}
}
unsigned int deviceCount;
err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount);
if (CL_SUCCESS != err)
{
printf("Error: Failed to clGetDeviceIDs, returned\n");
return 0;
}
cl_device_id *devices = (cl_device_id*)malloc(sizeof(cl_device_id) * deviceCount);
clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL);
if (CL_SUCCESS != err)
{
printf("Error: Failed to clGetDeviceIDs\n");
return 0;
}
*device_id = devices[0];
*context = clCreateContext(0, 1, device_id, NULL, NULL, &err);
if (!context)
{
printf("Error: Failed to create a compute context!\n");
return 0;
}
*commandqueue = clCreateCommandQueue(*context, *device_id, 0, &err);
if (!commandqueue)
{
printf("Error: Failed to create a command commands!\n");
return 0;
}
*program = clCreateProgramWithSource(*context, 1, (const char **)& KernelSource, NULL, &err);
if (!program)
{
printf("Error: Failed to create compute program!\n");
return 0;
}
err = clBuildProgram(*program, 0, NULL, NULL, NULL, NULL);
if (err != CL_SUCCESS)
{
// Determine the reason for the error
char buildLog[16384];
clGetProgramBuildInfo(*program, *device_id, CL_PROGRAM_BUILD_LOG,
sizeof(buildLog), buildLog, NULL);
printf("Error in program: %s", buildLog);
clReleaseProgram(*program);
return 0;
}
return 1;
}
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Philip,
You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.
Robert
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Philip,
You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.
Robert
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for the quick reply Robert: helps a lot.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page