- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I have been using the HD4600.
After version up of the driver,
Changes in the outcome of OpenCL occurred.
Is there a way to avoid?
(attached the resources)
· OpenCL SDK
Intel SDK for OpenCL Applications 2016
· GPU driver version
before: 10.18.10.3496 (2014/03/11)
after: 10.18.14.4264 (2015/08/04)
・clCreateContext()
clGetDeviceIDs() -
cl_device_type device_type = CL_DEVICE_TYPE_GPU; // (Intel(R) HD Graphics 4600)
・clBuildProgram()
const char *options = NULL
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
1. Have you tried https://downloadcenter.intel.com/download/25588/Intel-Graphics-Driver-for-Windows-7-8-1-15-36- driver?
2. Could you please provide a complete example and steps to reproduce?
3. What did you get before? What are you getting now?
4. What are the sizes of your input/output buffers? What is the size of your enqueue? global/local sizes? Complete example would help a lot!
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
BTW, on the latest and greatest production driver (4380) on Windows 10, I am getting the results I expect from your kernel. The assembly for your kernel looks correct as well. So it could be that what you got previously is in error.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Dear Robert,
Thank you kind reply
1. Have you tried https://downloadcenter.intel.com/download/25588/Intel-Graphics-Driver-for-Windows-7-8-1-15-36- driver?
I tried. But the results did not change
2. Could you please provide a complete example and steps to reproduce?
3. What did you get before? What are you getting now?
4. What are the sizes of your input/output buffers? What is the size of your enqueue? global/local sizes? Complete example would help a lot!
Sorry
Upload all of the data
Please check the readme.txt
BTW, on the latest and greatest production driver (4380) on Windows 10, I am getting the results I expect from your kernel. The assembly for your kernel looks correct as well. So it could be that what you got previously is in error.
Now I can not update to Windows10. However, there are plans to update
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Please let me questions again.
It is a new program and run results
Problem due has occurred in local_work_size
OS:Windows7
GPU:Intel(R) HD Graphics 4600
Driver:10.18.14.4332
OpenCL SDK: 2016
OpenCL Code Builder:6.0.0.1049
#include "stdafx.h" #include <Windows.h> #ifdef __APPLE__ #include <OpenCL/opencl.h> #else #include "CL/cl.h" #endif cl_device_id g_DeviceID = 0; cl_context g_Context = 0; cl_command_queue g_CommandQueue = 0; cl_program clProgram = 0; cl_kernel clKernel = 0; cl_mem g_InputBinBuf = 0; cl_mem g_OutputBuf = 0; const char* DRIVER_NAME = "Intel(R) HD Graphics 4600"; #define INPUT_BIN_WIDTH (8) #define INPUT_BIN_HEIGHT (4) #define DATA_NUM (INPUT_BIN_WIDTH*INPUT_BIN_HEIGHT) #define DATA_SIZE (DATA_NUM*4) // Test Data unsigned int InputData[DATA_NUM] = { 0, 0, 0, 0, 0, 0, 0, 0, 0x00000102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; // Kernel Code const char* kernel_code = "" "__kernel void TestKernel( __global unsigned int* oBuffer, \n" " const __global unsigned int* iRgbaImage, \n" " const int iImageWidth \n" ") \n" "{ \n" " int gx = get_global_id(0); \n" " int gy = get_global_id(1); \n" " \n" " unsigned int Input; \n" " unsigned int result; \n" " int pos; \n" " \n" " pos = gy * iImageWidth + gx; // ReadAddress \n" " Input = iRgbaImage[pos]; // TargetData \n" " \n" " result = Input & 0xff; \n" " if ( ((Input>>8)&0xff) != 0){ \n" " result = ((Input>>8)&0xff) + (Input&0xff); \n" " } \n" " \n" " oBuffer[pos] = result; \n" "} \n"; //----------------------------------- // Init Resource //----------------------------------- void Init(void) { cl_uint numPlatforms; cl_int ret; // CreateOpenCL ret = clGetPlatformIDs(0, NULL, &numPlatforms); cl_platform_id *platform_id = new cl_platform_id[numPlatforms]; cl_device_id device_id; ret = clGetPlatformIDs(numPlatforms, platform_id, NULL); static const int INFO_MAX = 128; char device_name[INFO_MAX]; for(cl_uint i = 0; i < numPlatforms; ++i){ clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL); clGetDeviceInfo( device_id, CL_DEVICE_NAME, INFO_MAX, device_name, NULL); if(NULL != strstr(device_name, DRIVER_NAME)){ g_DeviceID = device_id; break; } } if(g_DeviceID == 0){ printf("no match driver"); exit(0); } g_Context = clCreateContext( NULL, 1, &g_DeviceID, NULL, NULL, NULL); g_CommandQueue = clCreateCommandQueue(g_Context, g_DeviceID, 0, NULL); delete[] platform_id; size_t source_size = strlen(kernel_code); clProgram = clCreateProgramWithSource(g_Context, 1, (const char **)&kernel_code, (const size_t *)&source_size, &ret); ret = clBuildProgram(clProgram, 1, &g_DeviceID, NULL, NULL, NULL); clKernel = clCreateKernel(clProgram, "TestKernel", &ret); // Create Buffer g_InputBinBuf = clCreateBuffer(g_Context, CL_MEM_READ_WRITE, DATA_SIZE, NULL, &ret); g_OutputBuf = clCreateBuffer(g_Context, CL_MEM_READ_WRITE, DATA_SIZE, NULL, &ret); } //----------------------------------- // Call Kernel //----------------------------------- void NDRangeKernel(const UINT32* image, size_t* local_work_size) { // Copy InputBin To g_InputBinBuf clEnqueueWriteBuffer(g_CommandQueue, g_InputBinBuf, CL_TRUE, 0, DATA_SIZE, image, 0, NULL, NULL); // Kernl int data_width = INPUT_BIN_WIDTH; clSetKernelArg(clKernel, 0, sizeof(cl_mem), (void*)&g_OutputBuf); // output clSetKernelArg(clKernel, 1, sizeof(cl_mem), (void*)&g_InputBinBuf); // input clSetKernelArg(clKernel, 2, sizeof(int), (void *)&data_width); size_t global_item_size[] = {INPUT_BIN_WIDTH, INPUT_BIN_HEIGHT}; clEnqueueNDRangeKernel( g_CommandQueue, // clKernel, // 2, // dim NULL, // global_work_offset global_item_size, local_work_size, 0, NULL, NULL ); } //----------------------------------- // Clear Resource //----------------------------------- void End(void) { clReleaseMemObject(g_InputBinBuf); clReleaseMemObject(g_OutputBuf); clReleaseKernel(clKernel); clReleaseProgram(clProgram); clFlush(g_CommandQueue); clFinish(g_CommandQueue); clReleaseCommandQueue(g_CommandQueue); clReleaseContext(g_Context); } //================================================ // main //================================================ int _tmain(int argc, _TCHAR* argv[]) { //------------- // Init Resource Init(); //------------- // Test Main UINT32 ResultBuf[2][DATA_NUM]; // NDRangeKernel LocalWork=NULL NDRangeKernel(InputData, NULL); clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[0], 0, NULL, NULL); #if 0 // another error case NDRangeKernel(InputData, NULL); size_t full_item_size[] = {INPUT_BIN_WIDTH, INPUT_BIN_HEIGHT}; NDRangeKernel(InputData, full_item_size); clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[0], 0, NULL, NULL); #endif // NDRangeKernel LocalWork={1,1} size_t local_item_size[] = {1,1}; NDRangeKernel(InputData, local_item_size); clEnqueueReadBuffer(g_CommandQueue, g_OutputBuf, CL_TRUE, 0, DATA_SIZE, ResultBuf[1], 0, NULL, NULL); //------------- // Print Result printf("LocalWork=NULL\n"); for(int i = 0; i < INPUT_BIN_HEIGHT; i++){ for(int j = 0; j < INPUT_BIN_WIDTH; j++) printf("%d ",ResultBuf[0][i*INPUT_BIN_WIDTH + j]); printf("\n"); } printf("\n"); printf("LocalWork={1,1}\n"); for(int i = 0; i < INPUT_BIN_HEIGHT; i++){ for(int j = 0; j < INPUT_BIN_WIDTH; j++) printf("%d ",ResultBuf[1][i*INPUT_BIN_WIDTH + j]); printf("\n"); } printf("\n"); //------------- // Clear Resource End(); system("pause"); return 0; }
result
LocalWork=NULL
0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
LocalWork={1,1}
0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page