OpenCL* for CPU
Ask questions and share information on Intel® SDK for OpenCL™ Applications and OpenCL™ implementations for Intel® CPU.
Announcements
This forum covers OpenCL* for CPU only. OpenCL* for GPU questions can be asked in the GPU Compute Software forum. Intel® FPGA SDK for OpenCL™ questions can be ask in the FPGA Intel® High Level Design forum.
1719 Discussions

Crash Intel(R) OpenCL Offline Compiler (x64)

Alex_Tar
Beginner
559 Views

This code will cause to crash Offline Compiler

[bash]void sum_reduce_and_store(__local float *sdata, __global float *store_arr, float value, int store_off)
{
	unsigned int lsz = get_local_size(0);
	unsigned int lid = get_local_id(0);
	sdata[lid] = value;
	barrier(CLK_LOCAL_MEM_FENCE);
	if (lsz != 1) {
		if (lsz >= 512) { if (lid < 256) { sdata[lid] += sdata[lid + 256]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lsz >= 256) { if (lid < 128) { sdata[lid] += sdata[lid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lsz >= 128) { if (lid <  64) { sdata[lid] += sdata[lid +  64]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lid <  32) { sdata[lid] += sdata[lid +  32]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid <  16) { sdata[lid] += sdata[lid +  16]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid <  8) { sdata[lid] += sdata[lid +  8]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid <  4) { sdata[lid] += sdata[lid +  4]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid <  2) { sdata[lid] += sdata[lid +  2]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid <  1) { sdata[lid] += sdata[lid +  1]; } barrier(CLK_LOCAL_MEM_FENCE);
	}
	if (lid == 0) store_arr[store_off] = sdata[0];
	barrier(CLK_LOCAL_MEM_FENCE);
}
float activation_derived(float steepness, int act_func, __global float *outputs, __global float *sums, int o_i)
{
	switch (act_func)
	{
		case 0:
		case 12:
		case 13:
			return (steepness);
		case 3:
		case 4:
			return (2.0f * steepness * (((outputs[o_i]) < (0.01f)) ? (0.01f) : (((outputs[o_i]) > (0.99f)) ? (0.99f) : (outputs[o_i]))) * (1.0f - (((outputs[o_i]) < (0.01f)) ? (0.01f) : (((outputs[o_i]) > (0.99f)) ? (0.99f) : (outputs[o_i])))));
		case 5:
		case 6:
			return steepness * (1.0f - ((((outputs[o_i]) < (-0.98f)) ? (-0.98f) : (((outputs[o_i]) > (0.98f)) ? (0.98f) : (outputs[o_i])))*(((outputs[o_i]) < (-0.98f)) ? (-0.98f) : (((outputs[o_i]) > (0.98f)) ? (0.98f) : (outputs[o_i])))));
		case 7:
			return (-2.0f * sums[o_i] * outputs[o_i] * steepness * steepness);
		case 8:
			return (-2.0f * sums[o_i] * (outputs[o_i]+1.0f) * steepness * steepness);
		case 10:
			return (steepness * 1.0f / (2.0f * (1.0f + (((sums[o_i]) > 0) ? (sums[o_i]) : -(sums[o_i]))) * (1.0f + (((sums[o_i]) > 0) ? (sums[o_i]) : -(sums[o_i])))));
		case 11:
			return (steepness * 1.0f / ((1.0f + (((sums[o_i]) > 0) ? (sums[o_i]) : -(sums[o_i]))) * (1.0f + (((sums[o_i]) > 0) ? (sums[o_i]) : -(sums[o_i])))));
		case 14:
			return (steepness*cos(steepness*sums[o_i]));
		case 15:
			return (steepness*-sin(steepness*sums[o_i]));
		case 16:
			return (steepness*cos(steepness*sums[o_i])/2.0f);
		case 17:
			return (steepness*-sin(steepness*sums[o_i])/2.0f);
		case 2: //This should be an error
		case 1: //This should be an error
		case 9: //FIXME
			return -99.0;
		default: return 0;
	}
}
void backpropagate_MSE(__constant unsigned int *sizes,
			__global unsigned int *num_layers,
			__global unsigned int *num_neurons, 
			__global unsigned int *num_inputs,
			__global unsigned int *num_outputs,
			__global float *steepness,
			__global int *activation,
			__global float *weights,
			__global float *inputs,
			__global float *sums,
			__global float *outputs,
			__global float *train_errors,
			__global float *weights_deltas,
			__local float *steep_s,
			__local int *act_s,
			__local float *weights_s,
			__local float *reduce_s )
{
	unsigned int input_id = get_global_id(0);
	unsigned int lid = get_local_id(0);
	unsigned int lsz = get_local_size(0);
	unsigned int gnum;
	unsigned int gid = get_group_id(0);
	int l;
	if (sizes[5] % sizes[7])
		gnum = 1 + (sizes[5] / sizes[7]);
	else
		gnum = sizes[5] / sizes[7];
	for(l = num_layers[get_global_id(1)]-1; l >= 0; --l) {
		unsigned int part_layer_off = get_global_id(1)*sizes[1]+l;
		unsigned int num_neurons_l = num_neurons[part_layer_off];
		unsigned int n_layer_off = sizes[2]*part_layer_off;
		unsigned int o_layer_off = sizes[4]*part_layer_off;
		unsigned int output_off = o_layer_off-sizes[4];
		unsigned int n;
		barrier(CLK_LOCAL_MEM_FENCE);
		for(n = 0; n < num_neurons_l; n += lsz) {
			unsigned int neuron_num = n+lid;
			if (neuron_num < num_neurons[part_layer_off]){
				steep_s[neuron_num] = steepness[n_layer_off+neuron_num];
				act_s[neuron_num] = activation[n_layer_off+neuron_num];
			}
		}
		barrier(CLK_LOCAL_MEM_FENCE);
		for(n = 0; n < num_neurons_l && l != 0; ++n) {
			unsigned int num_outputs_l = num_outputs[n_layer_off+n];
			unsigned int o;
			for(o = 0; o < num_outputs_l; ++o) {
				if (sizes[5] > input_id)
					train_errors[output_off*sizes[5]+input_id] = 0.0f;
				++output_off;
			}
		}
		output_off = o_layer_off;
		for(n = 0; n < num_neurons[part_layer_off]; ++n) {
			unsigned int num_outputs_l = num_outputs[n_layer_off+n];
			unsigned int num_inputs_l  = num_inputs[n_layer_off+n];
			unsigned int o;
			for(o = 0; o < num_outputs_l; ++o) {
				unsigned int i;
				unsigned int o_i = output_off*sizes[5]+input_id;
				float error;
				if (sizes[5] > input_id)
					train_errors[o_i] = error =
					train_errors[o_i]*activation_derived(steep_s, act_s, outputs, sums, o_i);
				for(i = 0; i < num_inputs_l; ++i) {
					unsigned int weights_i = 0;
					unsigned int prev_output_i = 0;
					float delta = 0.0f;
					if (l != 0) {
						weights_i = (sizes[3]*o+i) % lsz;
						if (weights_i == 0) {
							barrier(CLK_LOCAL_MEM_FENCE);
							if (sizes[3]*o+i+lid < sizes[3]*num_outputs_l)
								weights_s[lid] = weights[output_off*sizes[3]+i+lid];
							barrier(CLK_LOCAL_MEM_FENCE);
						}
					}
					if (sizes[5] > input_id) {
						if(i == num_inputs_l-1){
							prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;
							delta = error;
						} else if(l == 0) {
							delta = inputs[i*sizes[5]+input_id] * error;
						} else {
							prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;
							delta = outputs[prev_output_i] * error;
						}
					}
					sum_reduce_and_store(reduce_s, weights_deltas, delta,
						(output_off*sizes[3]+i)*gnum+gid);
					if(l != 0 && sizes[5] > input_id)
						train_errors[prev_output_i] += error * weights_s[weights_i];
				}
				++output_off;
			}
		}
	}
}

__kernel void train_batch(
		__constant unsigned int *sizes,
		__global float *f_params,
		__global unsigned int *num_layers,
		__global unsigned int *num_neurons,
		__global unsigned int *num_inputs,
		__global unsigned int *num_outputs,
		__global float *steepness,
		__global int *activation,
		__global float *weights,
		__global float *inputs,
		__global float *sums,
		__global float *outputs,
		__global float *train_errors,
		__global float *actual_outputs,
		__global float *MSE_values,
		__global float *num_bit_fail,
		__global float *weights_deltas,
		__local float *steep_s,
		__local int *act_s,
		__local float *weights_s,
		__local float *reduce_s)
{
	backpropagate_MSE(sizes, num_layers, num_neurons, num_inputs, num_outputs,
		steepness, activation, weights, inputs, sums,
		outputs, train_errors, weights_deltas,
		steep_s, act_s, weights_s, reduce_s);
}

[/bash]
0 Kudos
3 Replies
Michael_Downey
Beginner
559 Views
I would expect you are running into the same bug I am in: http://software.intel.com/en-us/forums/showthread.php?t=83665&o=a&s=lr

Did you run your code using the latest release? I didn't see that my bug was fixed in the release notes and I haven't installed the latest release yet to check if it's fixed. So you'll save me some time trying if you are.
0 Kudos
Alex_Tar
Beginner
559 Views

Yes I use latest release. And this is more strange in beta version this code compiled.

0 Kudos
Uri_L_Intel
Employee
559 Views

Hello Alex,

Weve managed to reproduce the failure and the issue was submitted to the compiler team to find the root cause.

Well work to fix it on our next release.

Thanks,

Uri Levy

0 Kudos
Reply