<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Crash Intel(R) OpenCL Offline Compiler (x64) in OpenCL* for CPU</title>
    <link>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792366#M631</link>
    <description>I would expect you are running into the same bug I am in: &lt;A href="http://software.intel.com/en-us/forums/showthread.php?t=83665&amp;amp;o=a&amp;amp;s=lr" target="_blank"&gt;http://software.intel.com/en-us/forums/showthread.php?t=83665&amp;amp;o=a&amp;amp;s=lr&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;Did you run your code using the latest release? I didn't see that my bug was fixed in the release notes and I haven't installed the latest release yet to check if it's fixed. So you'll save me some time trying if you are.</description>
    <pubDate>Fri, 08 Jul 2011 17:02:00 GMT</pubDate>
    <dc:creator>Michael_Downey</dc:creator>
    <dc:date>2011-07-08T17:02:00Z</dc:date>
    <item>
      <title>Crash Intel(R) OpenCL Offline Compiler (x64)</title>
      <link>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792365#M630</link>
      <description>&lt;P&gt;This code will cause to crash Offline Compiler&lt;/P&gt;&lt;PRE&gt;[bash]void sum_reduce_and_store(__local float *sdata, __global float *store_arr, float value, int store_off)
{
	unsigned int lsz = get_local_size(0);
	unsigned int lid = get_local_id(0);
	sdata[lid] = value;
	barrier(CLK_LOCAL_MEM_FENCE);
	if (lsz != 1) {
		if (lsz &amp;gt;= 512) { if (lid &amp;lt; 256) { sdata[lid] += sdata[lid + 256]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lsz &amp;gt;= 256) { if (lid &amp;lt; 128) { sdata[lid] += sdata[lid + 128]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lsz &amp;gt;= 128) { if (lid &amp;lt;  64) { sdata[lid] += sdata[lid +  64]; } barrier(CLK_LOCAL_MEM_FENCE); }
		if (lid &amp;lt;  32) { sdata[lid] += sdata[lid +  32]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid &amp;lt;  16) { sdata[lid] += sdata[lid +  16]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid &amp;lt;  8) { sdata[lid] += sdata[lid +  8]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid &amp;lt;  4) { sdata[lid] += sdata[lid +  4]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid &amp;lt;  2) { sdata[lid] += sdata[lid +  2]; } barrier(CLK_LOCAL_MEM_FENCE);
		if (lid &amp;lt;  1) { sdata[lid] += sdata[lid +  1]; } barrier(CLK_LOCAL_MEM_FENCE);
	}
	if (lid == 0) store_arr[store_off] = sdata[0];
	barrier(CLK_LOCAL_MEM_FENCE);
}
float activation_derived(float steepness, int act_func, __global float *outputs, __global float *sums, int o_i)
{
	switch (act_func)
	{
		case 0:
		case 12:
		case 13:
			return (steepness);
		case 3:
		case 4:
			return (2.0f * steepness * (((outputs[o_i]) &amp;lt; (0.01f)) ? (0.01f) : (((outputs[o_i]) &amp;gt; (0.99f)) ? (0.99f) : (outputs[o_i]))) * (1.0f - (((outputs[o_i]) &amp;lt; (0.01f)) ? (0.01f) : (((outputs[o_i]) &amp;gt; (0.99f)) ? (0.99f) : (outputs[o_i])))));
		case 5:
		case 6:
			return steepness * (1.0f - ((((outputs[o_i]) &amp;lt; (-0.98f)) ? (-0.98f) : (((outputs[o_i]) &amp;gt; (0.98f)) ? (0.98f) : (outputs[o_i])))*(((outputs[o_i]) &amp;lt; (-0.98f)) ? (-0.98f) : (((outputs[o_i]) &amp;gt; (0.98f)) ? (0.98f) : (outputs[o_i])))));
		case 7:
			return (-2.0f * sums[o_i] * outputs[o_i] * steepness * steepness);
		case 8:
			return (-2.0f * sums[o_i] * (outputs[o_i]+1.0f) * steepness * steepness);
		case 10:
			return (steepness * 1.0f / (2.0f * (1.0f + (((sums[o_i]) &amp;gt; 0) ? (sums[o_i]) : -(sums[o_i]))) * (1.0f + (((sums[o_i]) &amp;gt; 0) ? (sums[o_i]) : -(sums[o_i])))));
		case 11:
			return (steepness * 1.0f / ((1.0f + (((sums[o_i]) &amp;gt; 0) ? (sums[o_i]) : -(sums[o_i]))) * (1.0f + (((sums[o_i]) &amp;gt; 0) ? (sums[o_i]) : -(sums[o_i])))));
		case 14:
			return (steepness*cos(steepness*sums[o_i]));
		case 15:
			return (steepness*-sin(steepness*sums[o_i]));
		case 16:
			return (steepness*cos(steepness*sums[o_i])/2.0f);
		case 17:
			return (steepness*-sin(steepness*sums[o_i])/2.0f);
		case 2: //This should be an error
		case 1: //This should be an error
		case 9: //FIXME
			return -99.0;
		default: return 0;
	}
}
void backpropagate_MSE(__constant unsigned int *sizes,
			__global unsigned int *num_layers,
			__global unsigned int *num_neurons, 
			__global unsigned int *num_inputs,
			__global unsigned int *num_outputs,
			__global float *steepness,
			__global int *activation,
			__global float *weights,
			__global float *inputs,
			__global float *sums,
			__global float *outputs,
			__global float *train_errors,
			__global float *weights_deltas,
			__local float *steep_s,
			__local int *act_s,
			__local float *weights_s,
			__local float *reduce_s )
{
	unsigned int input_id = get_global_id(0);
	unsigned int lid = get_local_id(0);
	unsigned int lsz = get_local_size(0);
	unsigned int gnum;
	unsigned int gid = get_group_id(0);
	int l;
	if (sizes[5] % sizes[7])
		gnum = 1 + (sizes[5] / sizes[7]);
	else
		gnum = sizes[5] / sizes[7];
	for(l = num_layers[get_global_id(1)]-1; l &amp;gt;= 0; --l) {
		unsigned int part_layer_off = get_global_id(1)*sizes[1]+l;
		unsigned int num_neurons_l = num_neurons[part_layer_off];
		unsigned int n_layer_off = sizes[2]*part_layer_off;
		unsigned int o_layer_off = sizes[4]*part_layer_off;
		unsigned int output_off = o_layer_off-sizes[4];
		unsigned int n;
		barrier(CLK_LOCAL_MEM_FENCE);
		for(n = 0; n &amp;lt; num_neurons_l; n += lsz) {
			unsigned int neuron_num = n+lid;
			if (neuron_num &amp;lt; num_neurons[part_layer_off]){
				steep_s[neuron_num] = steepness[n_layer_off+neuron_num];
				act_s[neuron_num] = activation[n_layer_off+neuron_num];
			}
		}
		barrier(CLK_LOCAL_MEM_FENCE);
		for(n = 0; n &amp;lt; num_neurons_l &amp;amp;&amp;amp; l != 0; ++n) {
			unsigned int num_outputs_l = num_outputs[n_layer_off+n];
			unsigned int o;
			for(o = 0; o &amp;lt; num_outputs_l; ++o) {
				if (sizes[5] &amp;gt; input_id)
					train_errors[output_off*sizes[5]+input_id] = 0.0f;
				++output_off;
			}
		}
		output_off = o_layer_off;
		for(n = 0; n &amp;lt; num_neurons[part_layer_off]; ++n) {
			unsigned int num_outputs_l = num_outputs[n_layer_off+n];
			unsigned int num_inputs_l  = num_inputs[n_layer_off+n];
			unsigned int o;
			for(o = 0; o &amp;lt; num_outputs_l; ++o) {
				unsigned int i;
				unsigned int o_i = output_off*sizes[5]+input_id;
				float error;
				if (sizes[5] &amp;gt; input_id)
					train_errors[o_i] = error =
					train_errors[o_i]*activation_derived(steep_s&lt;N&gt;, act_s&lt;N&gt;, outputs, sums, o_i);
				for(i = 0; i &amp;lt; num_inputs_l; ++i) {
					unsigned int weights_i = 0;
					unsigned int prev_output_i = 0;
					float delta = 0.0f;
					if (l != 0) {
						weights_i = (sizes[3]*o+i) % lsz;
						if (weights_i == 0) {
							barrier(CLK_LOCAL_MEM_FENCE);
							if (sizes[3]*o+i+lid &amp;lt; sizes[3]*num_outputs_l)
								weights_s[lid] = weights[output_off*sizes[3]+i+lid];
							barrier(CLK_LOCAL_MEM_FENCE);
						}
					}
					if (sizes[5] &amp;gt; input_id) {
						if(i == num_inputs_l-1){
							prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;
							delta = error;
						} else if(l == 0) {
							delta = inputs[i*sizes[5]+input_id] * error;
						} else {
							prev_output_i = (o_layer_off-sizes[4]+i)*sizes[5]+input_id;
							delta = outputs[prev_output_i] * error;
						}
					}
					sum_reduce_and_store(reduce_s, weights_deltas, delta,
						(output_off*sizes[3]+i)*gnum+gid);
					if(l != 0 &amp;amp;&amp;amp; sizes[5] &amp;gt; input_id)
						train_errors[prev_output_i] += error * weights_s[weights_i];
				}
				++output_off;
			}
		}
	}
}

__kernel void train_batch(
		__constant unsigned int *sizes,
		__global float *f_params,
		__global unsigned int *num_layers,
		__global unsigned int *num_neurons,
		__global unsigned int *num_inputs,
		__global unsigned int *num_outputs,
		__global float *steepness,
		__global int *activation,
		__global float *weights,
		__global float *inputs,
		__global float *sums,
		__global float *outputs,
		__global float *train_errors,
		__global float *actual_outputs,
		__global float *MSE_values,
		__global float *num_bit_fail,
		__global float *weights_deltas,
		__local float *steep_s,
		__local int *act_s,
		__local float *weights_s,
		__local float *reduce_s)
{
	backpropagate_MSE(sizes, num_layers, num_neurons, num_inputs, num_outputs,
		steepness, activation, weights, inputs, sums,
		outputs, train_errors, weights_deltas,
		steep_s, act_s, weights_s, reduce_s);
}
&lt;BR /&gt;[/bash]&lt;/N&gt;&lt;/N&gt;&lt;/PRE&gt;</description>
      <pubDate>Fri, 08 Jul 2011 06:16:36 GMT</pubDate>
      <guid>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792365#M630</guid>
      <dc:creator>Alex_Tar</dc:creator>
      <dc:date>2011-07-08T06:16:36Z</dc:date>
    </item>
    <item>
      <title>Crash Intel(R) OpenCL Offline Compiler (x64)</title>
      <link>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792366#M631</link>
      <description>I would expect you are running into the same bug I am in: &lt;A href="http://software.intel.com/en-us/forums/showthread.php?t=83665&amp;amp;o=a&amp;amp;s=lr" target="_blank"&gt;http://software.intel.com/en-us/forums/showthread.php?t=83665&amp;amp;o=a&amp;amp;s=lr&lt;/A&gt;&lt;BR /&gt;&lt;BR /&gt;Did you run your code using the latest release? I didn't see that my bug was fixed in the release notes and I haven't installed the latest release yet to check if it's fixed. So you'll save me some time trying if you are.</description>
      <pubDate>Fri, 08 Jul 2011 17:02:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792366#M631</guid>
      <dc:creator>Michael_Downey</dc:creator>
      <dc:date>2011-07-08T17:02:00Z</dc:date>
    </item>
    <item>
      <title>Crash Intel(R) OpenCL Offline Compiler (x64)</title>
      <link>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792367#M632</link>
      <description>&lt;P&gt;Yes I use latest release. And this is more strange in beta version this code compiled.&lt;/P&gt;</description>
      <pubDate>Fri, 08 Jul 2011 18:21:52 GMT</pubDate>
      <guid>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792367#M632</guid>
      <dc:creator>Alex_Tar</dc:creator>
      <dc:date>2011-07-08T18:21:52Z</dc:date>
    </item>
    <item>
      <title>Crash Intel(R) OpenCL Offline Compiler (x64)</title>
      <link>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792368#M633</link>
      <description>&lt;P&gt;Hello Alex,&lt;/P&gt;&lt;P&gt;Weve managed to reproduce the failure and the issue was submitted to the compiler team to find the root cause.&lt;/P&gt;&lt;P&gt;Well work to fix it on our next release.&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Uri Levy&lt;/P&gt;</description>
      <pubDate>Sun, 10 Jul 2011 07:22:46 GMT</pubDate>
      <guid>https://community.intel.com/t5/OpenCL-for-CPU/Crash-Intel-R-OpenCL-Offline-Compiler-x64/m-p/792368#M633</guid>
      <dc:creator>Uri_L_Intel</dc:creator>
      <dc:date>2011-07-10T07:22:46Z</dc:date>
    </item>
  </channel>
</rss>

