- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I have been using the HD4600.
After version up of the driver,
Changes in the outcome of OpenCL occurred.
Is there a way to avoid?
(attached the resources)
· OpenCL SDK
Intel SDK for OpenCL Applications 2016
· GPU driver version
before: 10.18.10.3496 (2014/03/11)
after: 10.18.14.4264 (2015/08/04)
・clCreateContext()
clGetDeviceIDs() -
cl_device_type device_type = CL_DEVICE_TYPE_GPU; // (Intel(R) HD Graphics 4600)
・clBuildProgram()
const char *options = NULL
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
1. Have you tried https://downloadcenter.intel.com/download/25588/Intel-Graphics-Driver-for-Windows-7-8-1-15-36- driver?
2. Could you please provide a complete example and steps to reproduce?
3. What did you get before? What are you getting now?
4. What are the sizes of your input/output buffers? What is the size of your enqueue? global/local sizes? Complete example would help a lot!
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
BTW, on the latest and greatest production driver (4380) on Windows 10, I am getting the results I expect from your kernel. The assembly for your kernel looks correct as well. So it could be that what you got previously is in error.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Dear Robert,
Thank you kind reply
1. Have you tried https://downloadcenter.intel.com/download/25588/Intel-Graphics-Driver-for-Windows-7-8-1-15-36- driver?
I tried. But the results did not change
2. Could you please provide a complete example and steps to reproduce?
3. What did you get before? What are you getting now?
4. What are the sizes of your input/output buffers? What is the size of your enqueue? global/local sizes? Complete example would help a lot!
Sorry
Upload all of the data
Please check the readme.txt
BTW, on the latest and greatest production driver (4380) on Windows 10, I am getting the results I expect from your kernel. The assembly for your kernel looks correct as well. So it could be that what you got previously is in error.
Now I can not update to Windows10. However, there are plans to update
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Please let me questions again.
It is a new program and run results
Problem due has occurred in local_work_size
OS:Windows7
GPU:Intel(R) HD Graphics 4600
Driver:10.18.14.4332
OpenCL SDK: 2016
OpenCL Code Builder:6.0.0.1049
#include "stdafx.h"
#include <Windows.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include "CL/cl.h"
#endif
cl_device_id g_DeviceID = 0;
cl_context g_Context = 0;
cl_command_queue g_CommandQueue = 0;
cl_program clProgram = 0;
cl_kernel clKernel = 0;
cl_mem g_InputBinBuf = 0;
cl_mem g_OutputBuf = 0;
const char* DRIVER_NAME = "Intel(R) HD Graphics 4600";
#define INPUT_BIN_WIDTH (8)
#define INPUT_BIN_HEIGHT (4)
#define DATA_NUM (INPUT_BIN_WIDTH*INPUT_BIN_HEIGHT)
#define DATA_SIZE (DATA_NUM*4)
// Test Data
unsigned int InputData[DATA_NUM] = {
0, 0, 0, 0, 0, 0, 0, 0,
0x00000102, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0
};
// Kernel Code
const char* kernel_code = ""
"__kernel void TestKernel( __global unsigned int* oBuffer, \n"
" const __global unsigned int* iRgbaImage, \n"
" const int iImageWidth \n"
") \n"
"{ \n"
" int gx = get_global_id(0); \n"
" int gy = get_global_id(1); \n"
" \n"
" unsigned int Input; \n"
" unsigned int result; \n"
" int pos; \n"
" \n"
" pos = gy * iImageWidth + gx; // ReadAddress \n"
" Input = iRgbaImage[pos]; // TargetData \n"
" \n"
" result = Input & 0xff; \n"
" if ( ((Input>>8)&0xff) != 0){ \n"
" result = ((Input>>8)&0xff) + (Input&0xff); \n"
" } \n"
" \n"
" oBuffer[pos] = result; \n"
"} \n";
//-----------------------------------
// Init Resource
//-----------------------------------
void Init(void)
{
cl_uint numPlatforms;
cl_int ret;
// CreateOpenCL
ret = clGetPlatformIDs(0, NULL, &numPlatforms);
cl_platform_id *platform_id = new cl_platform_id[numPlatforms];
cl_device_id device_id;
ret = clGetPlatformIDs(numPlatforms, platform_id, NULL);
static const int INFO_MAX = 128;
char device_name[INFO_MAX];
for(cl_uint i = 0; i < numPlatforms; ++i){
clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
clGetDeviceInfo( device_id, CL_DEVICE_NAME, INFO_MAX, device_name, NULL);
if(NULL != strstr(device_name, DRIVER_NAME)){
g_DeviceID = device_id;
break;
}
}
if(g_DeviceID == 0){
printf("no match driver");
exit(0);
}
g_Context = clCreateContext( NULL, 1, &g_DeviceID, NULL, NULL, NULL);
g_CommandQueue = clCreateCommandQueue(g_Context, g_DeviceID, 0, NULL);
delete[] platform_id;
size_t source_size = strlen(kernel_code);
clProgram = clCreateProgramWithSource(g_Context, 1, (const char **)&kernel_code, (const size_t *)&source_size, &ret);
ret = clBuildProgram(clProgram, 1, &g_DeviceID, NULL, NULL, NULL);
clKernel = clCreateKernel(clProgram, "TestKernel", &ret);
// Create Buffer
g_InputBinBuf = clCreateBuffer(g_Context, CL_MEM_READ_WRITE, DATA_SIZE, NULL, &ret);
g_OutputBuf = clCreateBuffer(g_Context, CL_MEM_READ_WRITE, DATA_SIZE, NULL, &ret);
}
//-----------------------------------
// Call Kernel
//-----------------------------------
void NDRangeKernel(const UINT32* image, size_t* local_work_size)
{
// Copy InputBin To g_InputBinBuf
clEnqueueWriteBuffer(g_CommandQueue, g_InputBinBuf, CL_TRUE, 0, DATA_SIZE, image, 0, NULL, NULL);
// Kernl
int data_width = INPUT_BIN_WIDTH;
clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&g_OutputBuf); // output
clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&g_InputBinBuf); // input
clSetKernelArg(clKernel, 2, sizeof(int), (void *)&data_width);
size_t global_item_size[] = {INPUT_BIN_WIDTH, INPUT_BIN_HEIGHT};
clEnqueueNDRangeKernel( g_CommandQueue, //
clKernel, //
2, // dim
NULL, // global_work_offset
global_item_size,
local_work_size,
0, NULL, NULL
);
}
//-----------------------------------
// Clear Resource
//-----------------------------------
void End(void)
{
clReleaseMemObject(g_InputBinBuf);
clReleaseMemObject(g_OutputBuf);
clReleaseKernel(clKernel);
clReleaseProgram(clProgram);
clFlush(g_CommandQueue);
clFinish(g_CommandQueue);
clReleaseCommandQueue(g_CommandQueue);
clReleaseContext(g_Context);
}
//================================================
// main
//================================================
int _tmain(int argc, _TCHAR* argv[])
{
//-------------
// Init Resource
Init();
//-------------
// Test Main
UINT32 ResultBuf[2][DATA_NUM];
// NDRangeKernel LocalWork=NULL
NDRangeKernel(InputData, NULL);
clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[0], 0, NULL, NULL);
#if 0 // another error case
NDRangeKernel(InputData, NULL);
size_t full_item_size[] = {INPUT_BIN_WIDTH, INPUT_BIN_HEIGHT};
NDRangeKernel(InputData, full_item_size);
clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[0], 0, NULL, NULL);
#endif
// NDRangeKernel LocalWork={1,1}
size_t local_item_size[] = {1,1};
NDRangeKernel(InputData, local_item_size);
clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[1], 0, NULL, NULL);
//-------------
// Print Result
printf("LocalWork=NULL\n");
for(int i = 0; i < INPUT_BIN_HEIGHT; i++){
for(int j = 0; j < INPUT_BIN_WIDTH; j++) printf("%d ",ResultBuf[0][i*INPUT_BIN_WIDTH + j]);
printf("\n");
}
printf("\n");
printf("LocalWork={1,1}\n");
for(int i = 0; i < INPUT_BIN_HEIGHT; i++){
for(int j = 0; j < INPUT_BIN_WIDTH; j++) printf("%d ",ResultBuf[1][i*INPUT_BIN_WIDTH + j]);
printf("\n");
}
printf("\n");
//-------------
// Clear Resource
End();
system("pause");
return 0;
}
result
LocalWork=NULL
0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
LocalWork={1,1}
0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page