- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I wrote a very simple code for testing share resouces. My envorinment: i7-3770k win7 64-bit Intel OpenCL SDK 2013. The attached is my project. When OpenCL kernel ran on CPU device, the results are right. However, when it ran on GPU device, the results are wrong. Anyone can help me explain this problem?
Thanks in advance.
By the way, when I wanted to debug the OpenCL source code, the debugger doesn't work. However, on the other PC it can work. I don't know why. Can Windows update cause this issue. I also noticed that some one met the same problem as well. http://redfort-software.intel.com/en-us/forums/showthread.php?t=101929
Sorry. I can not attach my project. So I posted the source code directly
Main.cpp
#define __CL_ENABLE_EXCEPTIONS
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#include
#include
#include
#define DATASIZE 512
#define WORKSIZE 32
#define LOCALSIZE 16
//#define CPU
using namespace std;
int main()
{
vector<:PLATFORM> platforms;
vector<:DEVICE> cpuDevices, gpuDevices, allDevices;
cl_uint minAlign ;
try {
cl::Platform::get(&platforms);
cout << "Platform number: " << platforms.size() << endl;
cout << "Platform name: " << platforms[0].getInfo
platforms[0].getDevices(CL_DEVICE_TYPE_CPU, &cpuDevices);
cout << "CPU device number: " << cpuDevices.size() << endl;
cout << "Device CPU name: " << cpuDevices[0].getInfo
cout << "Compute Units: " << cpuDevices[0].getInfo
cout << "Preferred Float Vector Width: " << cpuDevices[0].getInfo
platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &gpuDevices);
cout << "GPU device number: " << gpuDevices.size() << endl;
cout << "Device GPU name: " << gpuDevices[0].getInfo
cout << "Compute Units: " << gpuDevices[0].getInfo
cout << "Preferred Float Vector Width: " << gpuDevices[0].getInfo
platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &allDevices);
size_t time_resolution = cpuDevices[0].getInfo
cout << "cpu device profiling resolution: " << time_resolution << endl;
time_resolution = gpuDevices[0].getInfo
cout << "gpu device profiling resolution: " << time_resolution << endl;
minAlign = cpuDevices[0].getInfo
cout << "CPU device memory align: " << minAlign << endl;
minAlign = gpuDevices[0].getInfo
cout << "GPU device memory align: " << minAlign << endl;
cl_float* g_pfInput = (cl_float*) _aligned_malloc(DATASIZE * sizeof(cl_float), minAlign);
cl_float* g_pfOutput = (cl_float*) _aligned_malloc(DATASIZE * sizeof(cl_float), minAlign);
for(int i = 0; i < DATASIZE; i++)
{
g_pfInput = i;
g_pfOutput = -1;
}
#ifdef CPU
cl::Context context(cpuDevices);
#else
cl::Context context(gpuDevices);
#endif
std::ifstream programFile("oclWriteBuffer.cl");
std::string programString(std::istreambuf_iterator
cl::Program::Sources source(1, std::make_pair(programString.c_str(), programString.length()+ 1));
cl::Program program(context, source);
try {
#ifdef CPU
program.build(cpuDevices,"-g -s \\"D:\\\\Nick\\OpenCL\\\\Tutorial1\\\\oclShareMemoryTest\\\\oclShareMemoryTest\\\\oclWriteBuffer.cl\\"");
#else
program.build(gpuDevices,"-g -s \\"D:\\\\Nick\\OpenCL\\\\Tutorial1\\\\oclShareMemoryTest\\\\oclShareMemoryTest\\\\oclWriteBuffer.cl\\"");
#endif
}
catch(cl::Error e)
{
cout << "WriteBuffer: Build Error" << endl;
cout << e.what() << ": Error code " << e.err() << endl << endl;
string log;
#ifdef CPU
log = program.getBuildInfo
#else
log = program.getBuildInfo
#endif
cout << log << endl << endl;
}
cl::Kernel writeKernel(program, "WriteBuffer");
cl::Buffer inputBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, DATASIZE, g_pfInput);
cl::Buffer outputBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR, DATASIZE, g_pfOutput);
writeKernel.setArg(0, inputBuffer);
writeKernel.setArg(1, outputBuffer);
writeKernel.setArg(2, DATASIZE);
#ifdef CPU
cl::CommandQueue cmdQueue(context, cpuDevices[0]);
#else
cl::CommandQueue cmdQueue(context, gpuDevices[0]);
#endif
cmdQueue.enqueueNDRangeKernel(writeKernel, 0, WORKSIZE, LOCALSIZE);
cmdQueue.finish();
}
catch (cl::Error e) {
cout << e.what() << ": Error code " << e.err() << endl;
}
return 0;
}
oclWriteBuffer.cl file
// TODO: Add OpenCL kernel code here.
__kernel
void WriteBuffer(__global float* pfInput, __global float* pfOutput, int nLength)
{
size_t nOffset = get_global_size(0);
size_t nGID = get_global_id(0);
for (int i = 0; i < nLength / nOffset; i++)
{
pfOutput[i * nOffset + nGID] = pfInput[i * nOffset + nGID];
}
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Let me take a look and get back to you.
Thanks,
Raghu
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thanks,
Raghu
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page