- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
For research purposes regarding portability of OpenCL applications I set up a test application that measures the latency of memory enqueueing/dequeueing operations for the various possible API calls OpenCL offers. While generating correct results over a broad variety of platforms, operating systems, etc. it fails in case of constructing the buffer via CL_USE_HOST_PTR when built for a Cyclone V SoC where the host application runs on the ARM. The error that can be observed is that there are simply invalid values.
I read through some docs, hover I found no mention regarding that this should not work when targeting Intel FPGA SoCs. Did I overlook something, is the code below in some ways incorrect or have I run into a bug?
This is the relevant part of the code where the error happens (note that CL_ALLOC_HOST_PTR works fine).
I hope the code snippet below is understandable without the context
nlohmann::json performTestUseHostPtr ()
{
nlohmann::json results;
for (auto size : testSizes)
{
void* inputBufferHostPtr;
void* outputBufferHostPtr;
// Read somewhere in some docs that 64 Bit alignment is required, not sure if this applies to SoCs but just in case...
#ifdef OPEN_CL_INTEL_FPGA
posix_memalign (&inputBufferHostPtr, 64, size * sizeof (float));
posix_memalign (&outputBufferHostPtr, 64, size * sizeof (float));
#else
std::vector<float> inputBufferHost (size);
std::vector<float> outputBufferHost (size);
inputBufferHostPtr = inputBufferHost.data();
outputBufferHostPtr = outputBufferHost.data();
#endif
cl_int err;
cl::Buffer inputBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, size * sizeof (float), inputBufferHostPtr, &err);
if (err != CL_SUCCESS)
std::cerr << "Error creating input buffer: " << ntlab::OpenCLHelpers::getErrorString (err) << std::endl;
cl::Buffer outputBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, size * sizeof (float), outputBufferHostPtr, &err);
if (err != CL_SUCCESS)
std::cerr << "Error creating output buffer: " << ntlab::OpenCLHelpers::getErrorString (err) << std::endl;
mapUnmapTestsInternal (size, inputBuffer, outputBuffer);
#ifdef OPEN_CL_INTEL_FPGA
free (inputBufferHostPtr);
free (outputBufferHostPtr);
#endif
results[std::to_string (size)] = calculateTimes();
}
return results;
}
nlohmann::json performTestAllocHostPtr()
{
nlohmann::json results;
for (auto size : testSizes)
{
cl::Buffer inputBuffer (context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, size * sizeof (float));
cl::Buffer outputBuffer (context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, size * sizeof (float));
mapUnmapTestsInternal (size, inputBuffer, outputBuffer);
results[std::to_string (size)] = calculateTimes();
}
return results;
}
void mapUnmapTestsInternal (size_t size, cl::Buffer& inputBuffer, cl::Buffer& outputBuffer)
{
std::vector<cl::Event> inputBufferEvent (1);
std::vector<cl::Event> outputBufferEvent (1);
std::vector<cl::Event> kernelEvent (1);
#ifdef OPEN_CL_INTEL_FPGA
void* inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE, 0, size * sizeof (float));
#else
void* inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, size * sizeof (float));
#endif
std::memcpy (inputBufferMapped, testData.data(), size * sizeof (float));
kernel.setArg (0, inputBuffer);
kernel.setArg (1, outputBuffer);
for (int i = 0; i < numTests; ++i)
{
startTimes[i] = ntlab::HighResolutionTimer::now();
queue.enqueueUnmapMemObject (inputBuffer, inputBufferMapped, nullptr, &inputBufferEvent[0]);
inputBufferEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &writeBufferCompletedTimes[i]);
queue.enqueueNDRangeKernel (kernel, cl::NullRange, cl::NDRange (size), cl::NullRange, &inputBufferEvent, &kernelEvent[0]);
kernelEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &kernelCompletedTimes[i]);
void* outputBufferMapped = queue.enqueueMapBuffer (outputBuffer, CL_FALSE, CL_MAP_READ, 0, size * sizeof (float), &kernelEvent, &outputBufferEvent[0]);
outputBufferEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &readBufferCompletedTimes[i]);
#ifdef OPEN_CL_INTEL_FPGA
inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE, 0, size * sizeof (float), &kernelEvent);
#else
inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, size * sizeof (float), &kernelEvent);
#endif
queue.finish();
std::memcpy (inputBufferMapped, outputBufferMapped, size * sizeof (float));
queue.enqueueUnmapMemObject (outputBuffer, outputBufferMapped);
queue.finish();
}
// make sure the results are valid
auto* outputBufferMapped = static_cast<float*> (queue.enqueueMapBuffer (outputBuffer, CL_TRUE, CL_MAP_READ, 0, size * sizeof (float)));
for (int i = 0; i < size; ++i)
if (testData[i] != outputBufferMapped[i])
{
std::cerr << "Error: Unexpected value at position " << i << ", expected " << testData[i] << ", got " << outputBufferMapped[i] << std::endl;
break;
}
queue.enqueueUnmapMemObject (outputBuffer, outputBufferMapped);
queue.enqueueUnmapMemObject (inputBuffer, inputBufferMapped);
queue.finish ();
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page