Showing results for 
Search instead for 
Did you mean: 
Honored Contributor I

Wrong read from global memory

I have some problem with implementation CNN on FPGA with openCL 


There are 6 arrays of input data: img, weight, mean, scale, variance, bias. (float point 32 bit). 


One of the kernels(read from memory input data) run several times with different parameters and for all parameters works correctly 

except for mean[64], scale[64], variance[64], bias[64]. 


When run with this sizes of arrays this kernel is working wrongly. 


I display value of this array immediately after reading from global memory 


Value of arrays don't match with value of arrays that I wrote to the buffers of global memory. (all arrays alighnment by 64 before write to buffers). 


I check(read by host) this arrays after working FPGA and all arrays is match case with arrays that I write  


typedef struct {float data[VEC_SIZE]; 

} lane; 

typedef struct {float lanes[LANE_NUM]; 

} scalar; 


channel vector data_ch __attribute__((depth(0))); 

channel vector weight_ch __attribute__((depth(0))); 

channel scalar bias_ch __attribute__((depth(0))); 

channel scalar mean_ch __attribute__((depth(0))); 

channel scalar variance_ch __attribute__((depth(0))); 

channel scalar scale_ch __attribute__((depth(0))); 



void Read_Buf( uchar sizeX, 

uchar sizeY, 

uchar filterX, 

uchar filterY, 

uchar stride, 

uchar pad, 

__global lane *restrict img, 

__global vector *restrict weight, 

__global vector *restrict mean, 

__global vector *restrict variance, 

__global vector *restrict scale, 

__global scalar *restrict bias ) 




scalar bias_ch_in; 

scalar mean_ch_in; 

scalar variance_ch_in; 

scalar scale_ch_in; 


ushort global_x = get_global_id(0); 

ushort global_y = get_global_id(1); 

uint global_z = get_global_id(2); 


ushort loc_x = get_local_id(0); 

ushort loc_y = get_local_id(1);  

ushort loc_z = get_local_id(2); 


ushort block_x = get_group_id(0);  

ushort block_y = get_group_id(1);  

ushort block_z = get_group_id(2); 


(read "img" buffer) (correct for all params) 


(read "weight" buffer) (correct for all params) 


(write to channel "img") (correct for all params) 


(write to channel "weight") (correct for all params) 


(correct for all params besides size of arrays 64) 

if(loc_z==0 && loc_y==0 && loc_x==0){ bias_ch_in = bias[block_z]; 

mean_ch_in = mean[block_z]; 

variance_ch_in = variance[block_z]; 

scale_ch_in = scale[block_z]; 



write_channel_altera(bias_ch, bias_ch_in); 

write_channel_altera(mean_ch, mean_ch_in); 

write_channel_altera(variance_ch, variance_ch_in); 

write_channel_altera(scale_ch, scale_ch_in); 




printf("work-item x=%d, y=%d, z=%d, channel =0, write bias=%f\n", global_x, global_y, global_z, bias_ch_in.lanes[0]); 

printf("work-item x=%d, y=%d, z=%d, channel =0, write mean=%f\n", global_x, global_y, global_z, mean_ch_in.lanes[0]); 

printf("work-item x=%d, y=%d, z=%d, channel =0, write variance=%f\n", global_x, global_y, global_z, variance_ch_in.lanes[0]); 

printf("work-item x=%d, y=%d, z=%d, channel =0, write scale=%f\n", global_x, global_y, global_z, scale_ch_in.lanes[0]); 





thank you in advance 










0 Kudos
0 Replies