- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
OpenCL spec says that clReleaseMemObject() doesn't delete the specified memory object if there are queued tasks which use the object.
https://www.khronos.org/registry/OpenCL/sdk/1.0/docs/man/xhtml/clReleaseMemObject.html
> After the memobj reference count becomes zero and commands queued for execution on a command-queue(s) that use memobj have finished, the memory object is deleted.
However, the following example code causes segmentation fault on my environment (emulation with Intel FPGA SDK for OpenCL 18.1).
- kernel code
__kernel void sample(__global char * restrict s)
{
s[0] = 'H';
s[1] = 'e';
s[2] = 'l';
s[3] = 'l';
s[4] = 'o';
s[5] = '\0';
}
- host code
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <CL/cl.h>
#define KERNEL_FILE "sample.aocx"
#define KERNEL_NAME "sample"
#define ARRAY_SIZE (1024 * 1024)
static void check_status(cl_int err, const char *api)
{
if (err == CL_SUCCESS)
return;
abort();
}
int main()
{
FILE *fp = fopen(KERNEL_FILE, "r");
if (fp == NULL) {
fprintf(stderr, "Could not open file %s.\n", KERNEL_FILE);
exit(1);
}
fseek(fp, 0, SEEK_END);
long file_size = ftell(fp);
unsigned char *binary = malloc(file_size);
if (binary == NULL) {
fprintf(stderr, "Could not allocate memory.\n");
exit(1);
}
fseek(fp, 0, SEEK_SET);
if (fread(binary, file_size, 1, fp) != 1) {
fprintf(stderr, "Could not read file %s.\n", KERNEL_FILE);
exit(1);
}
fclose(fp);
cl_int status;
cl_platform_id platform_id;
cl_uint num_platforms;
status = clGetPlatformIDs(1, &platform_id, &num_platforms);
check_status(status, "clGetPlatformIDs");
cl_device_id device_id;
cl_uint num_devices;
status = clGetDeviceIDs(platform_id,
CL_DEVICE_TYPE_ACCELERATOR,
1,
&device_id,
&num_devices);
check_status(status, "clGetDeviceIDs");
cl_context context = clCreateContext(NULL,
1,
&device_id,
NULL,
NULL,
&status);
check_status(status, "clCreateContext");
cl_command_queue command_queue = clCreateCommandQueue(context,
device_id,
0,
&status);
check_status(status, "clCreateCommandQueue");
cl_int binary_status;
size_t binary_size = file_size;
cl_program program = clCreateProgramWithBinary(context,
1,
&device_id,
&binary_size,
(const unsigned char **)&binary,
&binary_status,
&status);
check_status(status, "clCreateProgramWithBinary");
cl_kernel kernel = clCreateKernel(program, KERNEL_NAME, &status);
check_status(status, "clCreateKernel");
cl_mem mem_a = clCreateBuffer(context,
CL_MEM_READ_WRITE,
ARRAY_SIZE,
NULL,
&status);
check_status(status, "clCreateBuffer");
status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_a);
check_status(status, "clSetKernelArg");
status = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
check_status(status, "clEnqueueTask");
status = clReleaseMemObject(mem_a);
check_status(status, "clReleaseMemObject");
status = clFlush(command_queue);
check_status(status, "clFlush");
status = clFinish(command_queue);
check_status(status, "clFinish");
status = clReleaseKernel(kernel);
check_status(status, "clReleaseKernel");
status = clReleaseProgram(program);
check_status(status, "clReleaseProgram");
status = clReleaseCommandQueue(command_queue);
check_status(status, "clReleaseCommandQueue");
status = clReleaseContext(context);
check_status(status, "clReleaseContext");
free(binary);
return 0;
}
I tried valgrind, and it looks like clReleaseMemObject() deletes the memory object even if there is a running kernel which uses the memory.
==55813== Invalid write of size 1
==55813== at 0xBC304CA: sample (sample.cl:3)
==55813== Address 0xb8a5000 is 912 bytes inside a block of size 1,049,600 free'd
==55813== at 0x4C2EDEB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C42D19: acl_mem_aligned_free (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C47307: clReleaseMemObjectIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CFFE: clReleaseMemObject (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x4011BE: main (main.c:96)
==55813== Block was alloc'd at
==55813== at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C41A87: acl_mem_aligned_malloc (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C49A12: clCreateBufferIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CF17: clCreateBuffer (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x40113E: main (main.c:83)
==55813==
==55813== Invalid write of size 1
==55813== at 0xBC304D2: sample (sample.cl:4)
==55813== Address 0xb8a5001 is 913 bytes inside a block of size 1,049,600 free'd
==55813== at 0x4C2EDEB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C42D19: acl_mem_aligned_free (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C47307: clReleaseMemObjectIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CFFE: clReleaseMemObject (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x4011BE: main (main.c:96)
==55813== Block was alloc'd at
==55813== at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C41A87: acl_mem_aligned_malloc (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C49A12: clCreateBufferIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CF17: clCreateBuffer (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x40113E: main (main.c:83)
==55813==
==55813== Invalid write of size 1
==55813== at 0xBC304DB: sample (sample.cl:5)
==55813== Address 0xb8a5002 is 914 bytes inside a block of size 1,049,600 free'd
==55813== at 0x4C2EDEB: free (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C42D19: acl_mem_aligned_free (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C47307: clReleaseMemObjectIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CFFE: clReleaseMemObject (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x4011BE: main (main.c:96)
==55813== Block was alloc'd at
==55813== at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==55813== by 0x5C41A87: acl_mem_aligned_malloc (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x5C49A12: clCreateBufferIntelFPGA (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libalteracl.so)
==55813== by 0x4E3CF17: clCreateBuffer (in /opt/intelFPGA/18.1/hld/host/linux64/lib/libOpenCL.so.1)
==55813== by 0x40113E: main (main.c:83)
Is this a bug, or am I missing something?
- Tags:
- OpenCL™
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Try compiling your kernel again another OpenCL SDK; e.g. AMD's or NVIDIA's SDK and see if your code would segfault in the same place. If it doesn't, then this is a bug in the Intel FPGA SDK. Though, I am very surprised to see that the specification claims the buffer will be freed after commands depending on it finish; the command queue information is NOT passed to the clReleaseMemObject function as an argument, so I fail to see how this function will be able to determine when the buffer is safe to delete.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks for your reply. I tried NVIDIA's SDK and Xilinx SDAccel, and those frameworks worked correctly. It looks like a bug in the Intel FPGA SDK.

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page