Intel® High Level Design
Support for Intel® High Level Synthesis Compiler, DSP Builder, OneAPI for Intel® FPGAs, Intel® FPGA SDK for OpenCL™
688 Discussions

Receiving an error when trying to use channels in OpenCL

offreitas
Beginner
1,275 Views

Hello,

 

I'm trying to use a multikernel design and sending data through channels. However, I keep getting this error when trying to emulate my design:

 

host: acl_emulator.cpp:309: void* __acl_emulator_channel_dequeue(size_t): Assertion `!__acl_emulator_channel_empty(ch)' failed.

 

I can't find this "acl_emulator.cpp" archive anywhere. What am I probably doing wrong?

 

Thanks

0 Kudos
3 Replies
HRZ
Valued Contributor III
1,255 Views

That is an internal compiler error. If you can post your kernel code, it would be easier to find the issue.

0 Kudos
offreitas
Beginner
1,218 Views
#include "../header/stencil.h"

#define CHANNEL_SIZE 16

// Enable channels
#pragma OPENCL EXTENSION cl_intel_channels : enable

// Floating point optimizations
#pragma clang fp contract(fast)
#pragma clang fp reassoc(on)

/***********************************************************
*********************** DATA TYPES *************************
***********************************************************/
typedef struct {
	float data[ACCESS_SIZE];
} channelData;

/***********************************************************
************************ CHANNELS **************************
***********************************************************/
/*
 * Arguments
 */
channel float3 fargs[TIME + 1] __attribute__((depth(0)));

/*
 * Matrices
 */
channel channelData prev[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
channel channelData next[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
channel channelData vel[TIME + 1]  __attribute__((depth(CHANNEL_SIZE)));

/***********************************************************
********************* CONSTANTS KERNEL *********************
***********************************************************/
__attribute__((max_global_work_dim(0)))
__kernel void constants(const float div_dxSquared, const float div_dzSquared, const float dtSquared) {
	/*
	 * Transform constants into OpenCL's data struct
	 */
	// Output constants
	float3 fconstants_out = (float3)(div_dxSquared, div_dzSquared, dtSquared);

	// Inputs contants
	float3 fconstants_in;

	/*
	 * Communication with channels
	 */
	// Writes on channels
	write_channel_intel(fargs[0], fconstants_out);
	mem_fence(CLK_CHANNEL_MEM_FENCE);

	// Reads from channels
	/*
	 * In order to prevent compiler from inferring depth channels with constant's channels,
	 * the code needs to create a false cycle of channels
	 */
	fconstants_in = read_channel_intel(fargs[TIME]);
	mem_fence(CLK_CHANNEL_MEM_FENCE);
}

/***********************************************************
********************** READER KERNEL ***********************
***********************************************************/
__kernel void reader(__global volatile float* restrict prev_base,
		     __global volatile float* restrict next_base,
		     __constant float* restrict vel_squared) {
	/*
	 * Variables to calculate index in grid
	 */
	uint group_offset = get_local_id(0) * ACCESS_SIZE;
	uint x_offset = group_offset - 1;
	uint y = get_global_id(1);
	uint x_limit = ACCESS_SIZE + ORDER;

	/*
	 * Variable to send to PE
	 */
	channelData input;

	for (uint i = 0; i < ACCESS_SIZE; i++) input.data[i] = 0.0f;

	/*
	 * Sends data to PE
	 */
	for (uint i = 0; i < x_limit; i++) {
		uint real_x = x_offset + i;
		uint index = y * NX + real_x;

		if (real_x >= 0 && real_x < NX) {
			input.data[i] = prev_base[index];
		}
	}

	write_channel_intel(prev[0], input);
}

/***********************************************************
********************** WRITER KERNEL ***********************
***********************************************************/
__kernel void writer(__global volatile float* restrict next_base) {
	/*
	 * Variables to calculate index in grid
	 */
	uint group_offset = get_local_id(0) * ACCESS_SIZE;
	uint x_offset = group_offset - 1;
	uint y = get_global_id(1);
	uint x_limit = ACCESS_SIZE + ORDER;

	/*
	 * Reads from PE
	 */
	channelData output = read_channel_intel(prev[TIME]);

	/*
	 * Writes matrix
	 */
	for (uint i = 0; i < x_limit; i++) {
		uint real_x = x_offset + i;
		uint index = y * NX + real_x;

		if (real_x >= 0 && real_x < NX) {
			next_base[index] = output.data[i];
		}
	}
}

/***********************************************************
******************* PROCESSING ELEMENTS ********************
***********************************************************/
__attribute__((max_global_work_dim(0)))
__attribute__((autorun))
__attribute__((num_compute_units(TIME, 1, 1)))
__kernel void PE() {
	/*
	 * Gets current and next PE
	 */
	const uint id = get_compute_id(0);
	const uint next_id = id + 1;

	/*
	 * Reads arguments
	 */
	const float3 fconstants = read_channel_intel(fargs[id]);
	mem_fence(CLK_CHANNEL_MEM_FENCE);

	/*
	 * Writes arguments on next channel
	 */
	write_channel_intel(fargs[next_id], fconstants);

	/*
	 * Memory Allocation
	 */
	// Shift registers
	__local float __attribute__((doublepump,
				     memory("MLAB"),
				     bankwidth(4))) sr_prev[SR_SIZE],
		      				    sr_next[SR_SIZE],
		      				    sr_vel[SR_SIZE];

	const float div_dxSquared = fconstants.s0;
	const float div_dzSquared = fconstants.s1;
	const float dtSquared = fconstants.s2;

	// Channel communication
	channelData input;
	channelData output;

	// Times channel will be read
	const uint count = 2 * GROUPS + 1;

	/*
	 * Computation
	 */
	// Index of shift registers
	uint index = 0;

	for (uint i = 0; i < count; i++) {
		input = read_channel_intel(prev[id]);

		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
			sr_prev[index++] = input.data[pos];
	}

	for (uint i = 0; i < GROUPS * NZ - count; i++) {
		for (uint group = 0; group < GROUPS; group++) {}
		// Writes future discarded values in channels
		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
			output.data[pos] = sr_prev[pos];
		
		write_channel_intel(prev[next_id], output);

		// Shifts registers
		#pragma unroll
		for (uint pos = 0; pos < SR_LIMIT; pos++) {
			uint shift_pos = ACCESS_SIZE + pos;

			sr_prev[pos] = sr_prev[shift_pos];
		}

		// Read from channels
		input = read_channel_intel(prev[id]);

		for (uint pos = SR_LIMIT; pos < SR_SIZE; pos++)
			sr_prev[pos] = input.data[pos - SR_LIMIT];
	}
}
0 Kudos
AnilErinch_A_Intel
1,160 Views

Hi ,

Thank you for contacting Intel Support.


Can you try the example mentioned in the below link and check whether same error happens.


https://www.intel.com/content/www/us/en/programmable/customertraining/OLT/OpenCLChannels/lab.pdf


Thanks and Regards

Anil


0 Kudos
Reply