- Marcar como nuevo
- Favorito
- Suscribir
- Silenciar
- Suscribirse a un feed RSS
- Resaltar
- Imprimir
- Informe de contenido inapropiado
For research purposes regarding portability of OpenCL applications I set up a test application that measures the latency of memory enqueueing/dequeueing operations for the various possible API calls OpenCL offers. While generating correct results over a broad variety of platforms, operating systems, etc. it fails in case of constructing the buffer via CL_USE_HOST_PTR when built for a Cyclone V SoC where the host application runs on the ARM. The error that can be observed is that there are simply invalid values.
I read through some docs, hover I found no mention regarding that this should not work when targeting Intel FPGA SoCs. Did I overlook something, is the code below in some ways incorrect or have I run into a bug?
This is the relevant part of the code where the error happens (note that CL_ALLOC_HOST_PTR works fine).
I hope the code snippet below is understandable without the context
nlohmann::json performTestUseHostPtr ()
{
nlohmann::json results;
for (auto size : testSizes)
{
void* inputBufferHostPtr;
void* outputBufferHostPtr;
// Read somewhere in some docs that 64 Bit alignment is required, not sure if this applies to SoCs but just in case...
#ifdef OPEN_CL_INTEL_FPGA
posix_memalign (&inputBufferHostPtr, 64, size * sizeof (float));
posix_memalign (&outputBufferHostPtr, 64, size * sizeof (float));
#else
std::vector<float> inputBufferHost (size);
std::vector<float> outputBufferHost (size);
inputBufferHostPtr = inputBufferHost.data();
outputBufferHostPtr = outputBufferHost.data();
#endif
cl_int err;
cl::Buffer inputBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, size * sizeof (float), inputBufferHostPtr, &err);
if (err != CL_SUCCESS)
std::cerr << "Error creating input buffer: " << ntlab::OpenCLHelpers::getErrorString (err) << std::endl;
cl::Buffer outputBuffer (context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, size * sizeof (float), outputBufferHostPtr, &err);
if (err != CL_SUCCESS)
std::cerr << "Error creating output buffer: " << ntlab::OpenCLHelpers::getErrorString (err) << std::endl;
mapUnmapTestsInternal (size, inputBuffer, outputBuffer);
#ifdef OPEN_CL_INTEL_FPGA
free (inputBufferHostPtr);
free (outputBufferHostPtr);
#endif
results[std::to_string (size)] = calculateTimes();
}
return results;
}
nlohmann::json performTestAllocHostPtr()
{
nlohmann::json results;
for (auto size : testSizes)
{
cl::Buffer inputBuffer (context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, size * sizeof (float));
cl::Buffer outputBuffer (context, CL_MEM_ALLOC_HOST_PTR | CL_MEM_WRITE_ONLY, size * sizeof (float));
mapUnmapTestsInternal (size, inputBuffer, outputBuffer);
results[std::to_string (size)] = calculateTimes();
}
return results;
}
void mapUnmapTestsInternal (size_t size, cl::Buffer& inputBuffer, cl::Buffer& outputBuffer)
{
std::vector<cl::Event> inputBufferEvent (1);
std::vector<cl::Event> outputBufferEvent (1);
std::vector<cl::Event> kernelEvent (1);
#ifdef OPEN_CL_INTEL_FPGA
void* inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE, 0, size * sizeof (float));
#else
void* inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, size * sizeof (float));
#endif
std::memcpy (inputBufferMapped, testData.data(), size * sizeof (float));
kernel.setArg (0, inputBuffer);
kernel.setArg (1, outputBuffer);
for (int i = 0; i < numTests; ++i)
{
startTimes[i] = ntlab::HighResolutionTimer::now();
queue.enqueueUnmapMemObject (inputBuffer, inputBufferMapped, nullptr, &inputBufferEvent[0]);
inputBufferEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &writeBufferCompletedTimes[i]);
queue.enqueueNDRangeKernel (kernel, cl::NullRange, cl::NDRange (size), cl::NullRange, &inputBufferEvent, &kernelEvent[0]);
kernelEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &kernelCompletedTimes[i]);
void* outputBufferMapped = queue.enqueueMapBuffer (outputBuffer, CL_FALSE, CL_MAP_READ, 0, size * sizeof (float), &kernelEvent, &outputBufferEvent[0]);
outputBufferEvent[0].setCallback (CL_COMPLETE, setTimestampCallback, &readBufferCompletedTimes[i]);
#ifdef OPEN_CL_INTEL_FPGA
inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE, 0, size * sizeof (float), &kernelEvent);
#else
inputBufferMapped = queue.enqueueMapBuffer (inputBuffer, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, size * sizeof (float), &kernelEvent);
#endif
queue.finish();
std::memcpy (inputBufferMapped, outputBufferMapped, size * sizeof (float));
queue.enqueueUnmapMemObject (outputBuffer, outputBufferMapped);
queue.finish();
}
// make sure the results are valid
auto* outputBufferMapped = static_cast<float*> (queue.enqueueMapBuffer (outputBuffer, CL_TRUE, CL_MAP_READ, 0, size * sizeof (float)));
for (int i = 0; i < size; ++i)
if (testData[i] != outputBufferMapped[i])
{
std::cerr << "Error: Unexpected value at position " << i << ", expected " << testData[i] << ", got " << outputBufferMapped[i] << std::endl;
break;
}
queue.enqueueUnmapMemObject (outputBuffer, outputBufferMapped);
queue.enqueueUnmapMemObject (inputBuffer, inputBufferMapped);
queue.finish ();
}
Enlace copiado
- Marcar como nuevo
- Favorito
- Suscribir
- Silenciar
- Suscribirse a un feed RSS
- Resaltar
- Imprimir
- Informe de contenido inapropiado
- Marcar como nuevo
- Favorito
- Suscribir
- Silenciar
- Suscribirse a un feed RSS
- Resaltar
- Imprimir
- Informe de contenido inapropiado

- Suscribirse a un feed RSS
- Marcar tema como nuevo
- Marcar tema como leído
- Flotar este Tema para el usuario actual
- Favorito
- Suscribir
- Página de impresión sencilla