Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Beginner
64 Views

clSetEventCallback Oddity

Jump to solution

I suspect there's an aspect of clSetEventCallback that I'm misunderstanding. If you have a look at this simplified code below, you can see a kernel call, followed by a clSetEventCallback. The program then sits and waits on a windows event (inside the callback) to be set.
If the clSetEventCallback is immediately followed by a clFlush(), then the callback is called and the wait released as expected. However without the clFlush, the kernel is never called, nor its callback, and the wait is eternal. What am I missing here? 

Environment: i7-4770, Intel OpenCL SDK 4.6, windows 7 sp1, visual studio vs2013, driver 15.36.19

#include "CL\cl.h"

#include <windows.h>
#include "stdafx.h"

int initialiseEnvironment(
    char *KernelSource,
    cl_device_id *device_id,
    cl_context *context,
    cl_command_queue *commandqueue,
    cl_program *program
    );

void __stdcall kernel_complete_callback(cl_event complete_event, cl_int cmd_sts, void *user_data)
{

    HANDLE* bufferEventHandle = (HANDLE*)user_data;

    if (!SetEvent(*bufferEventHandle))
        printf("callback fail");
}


int _tmain(int argc, _TCHAR* argv[])
{

    DWORD dwWaitResult;
    cl_int errcode_ret = CL_SUCCESS;

    char *KernelSource = "\n" \
        "__kernel void debugTest() \n" \
        "{  \n" \
        "  printf(\"here\"); \n" \
        "}  \n" \
        "\n";

    cl_device_id device_id;
    cl_context context;
    cl_command_queue commandQueue; 
    cl_program program; 

    // set up the opencl basics

    initialiseEnvironment(
        KernelSource,
        &device_id,
        &context,
        &commandQueue,
        &program);

    cl_kernel debugTest = clCreateKernel(program, "debugTest", &errcode_ret);
    if (CL_SUCCESS != errcode_ret) return 0;

    HANDLE bufferCompleteEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
    if (bufferCompleteEvent == NULL) return 0;

    if (!ResetEvent(bufferCompleteEvent))return 0;

    cl_event kernelCompleteEvent;

    size_t threadSize = 1;

    errcode_ret = clEnqueueNDRangeKernel(
        commandQueue,
        debugTest,
        1,
        0,
        &threadSize,
        NULL,
        0,
        NULL,
        &(kernelCompleteEvent));

    if (CL_SUCCESS != errcode_ret)
        return 0;

    errcode_ret = clSetEventCallback(kernelCompleteEvent, CL_COMPLETE, &kernel_complete_callback, &bufferCompleteEvent);

    if (CL_SUCCESS != errcode_ret)
        return 0;

    clFlush(commandQueue);       // if commented, then program never gets beyond wait on next line

    dwWaitResult = WaitForSingleObject(bufferCompleteEvent, INFINITE);

    if (dwWaitResult != WAIT_OBJECT_0)
        return 0;

    return 0;
}


int initialiseEnvironment(
    char *KernelSource,
    cl_device_id *device_id,
    cl_context *context,
    cl_command_queue *commandqueue,
    cl_program *program
    )
{

    int err;
    cl_uint platformCount;

    clGetPlatformIDs(0, NULL, &platformCount);
    cl_platform_id *platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * platformCount);
    clGetPlatformIDs(platformCount, platforms, NULL);

    unsigned int selectedPlatform = -1;
    for (unsigned int i = 0; i < platformCount; i++) {

        char* value;
        size_t size = 0;
        clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, NULL, &size);
        value = (char*)malloc(sizeof(char) * size);
        clGetPlatformInfo(platforms, CL_PLATFORM_NAME, size, value, NULL);

        if (strcmp(value, "Intel(R) OpenCL") == 0) {
            selectedPlatform = i;
            break;

        }
    }

    unsigned int deviceCount;

    err = clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, 0, NULL, &deviceCount);

    if (CL_SUCCESS != err)
    {
        printf("Error: Failed to clGetDeviceIDs, returned\n");
        return 0;
    }

    cl_device_id *devices = (cl_device_id*)malloc(sizeof(cl_device_id) * deviceCount);

    clGetDeviceIDs(platforms[selectedPlatform], CL_DEVICE_TYPE_GPU, deviceCount, devices, NULL);

    if (CL_SUCCESS != err)
    {
        printf("Error: Failed to clGetDeviceIDs\n");
        return 0;
    }

    *device_id = devices[0];

    *context = clCreateContext(0, 1, device_id, NULL, NULL, &err);

    if (!context)

    {
        printf("Error: Failed to create a compute context!\n");
        return 0;
    }

    *commandqueue = clCreateCommandQueue(*context, *device_id, 0, &err);

    if (!commandqueue)

    {
        printf("Error: Failed to create a command commands!\n");
        return 0;
    }

    *program = clCreateProgramWithSource(*context, 1, (const char **)& KernelSource, NULL, &err);

    if (!program)

    {
        printf("Error: Failed to create compute program!\n");
        return 0;
    }

    err = clBuildProgram(*program, 0, NULL, NULL, NULL, NULL);

    if (err != CL_SUCCESS)
    {
        // Determine the reason for the error
        char buildLog[16384];
        clGetProgramBuildInfo(*program, *device_id, CL_PROGRAM_BUILD_LOG,
            sizeof(buildLog), buildLog, NULL);
        printf("Error in program: %s", buildLog);
        clReleaseProgram(*program);
        return 0;
    }
    return 1;
}

 

 

0 Kudos

Accepted Solutions
Employee
64 Views

Hi Philip,

You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.

Robert

View solution in original post

0 Kudos
2 Replies
Employee
65 Views

Hi Philip,

You will need either clFlush or clFinish or clWaitForEvents(1, &kernelCompleteEvent) after clEnqueueNDRange in this case for the kernel to start executing.

Robert

View solution in original post

0 Kudos
Beginner
64 Views

Thanks for the quick reply Robert: helps a lot.

0 Kudos