Intel® High Level Design
Support for Intel® High Level Synthesis Compiler, DSP Builder, OneAPI for Intel® FPGAs, Intel® FPGA SDK for OpenCL™
Announcements
Intel Support hours are Monday-Fridays, 8am-5pm PST, except Holidays. Thanks to our community members who provide support during our down time or before we get to your questions. We appreciate you!

Need Forum Guidance? Click here
Search our FPGA Knowledge Articles here.
528 Discussions

Receiving an error when trying to use channels in OpenCL

offreitas
Beginner
468 Views

Hello,

 

I'm trying to use a multikernel design and sending data through channels. However, I keep getting this error when trying to emulate my design:

 

host: acl_emulator.cpp:309: void* __acl_emulator_channel_dequeue(size_t): Assertion `!__acl_emulator_channel_empty(ch)' failed.

 

I can't find this "acl_emulator.cpp" archive anywhere. What am I probably doing wrong?

 

Thanks

0 Kudos
3 Replies
HRZ
Valued Contributor II
448 Views

That is an internal compiler error. If you can post your kernel code, it would be easier to find the issue.

offreitas
Beginner
411 Views
#include "../header/stencil.h"

#define CHANNEL_SIZE 16

// Enable channels
#pragma OPENCL EXTENSION cl_intel_channels : enable

// Floating point optimizations
#pragma clang fp contract(fast)
#pragma clang fp reassoc(on)

/***********************************************************
*********************** DATA TYPES *************************
***********************************************************/
typedef struct {
	float data[ACCESS_SIZE];
} channelData;

/***********************************************************
************************ CHANNELS **************************
***********************************************************/
/*
 * Arguments
 */
channel float3 fargs[TIME + 1] __attribute__((depth(0)));

/*
 * Matrices
 */
channel channelData prev[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
channel channelData next[TIME + 1] __attribute__((depth(CHANNEL_SIZE)));
channel channelData vel[TIME + 1]  __attribute__((depth(CHANNEL_SIZE)));

/***********************************************************
********************* CONSTANTS KERNEL *********************
***********************************************************/
__attribute__((max_global_work_dim(0)))
__kernel void constants(const float div_dxSquared, const float div_dzSquared, const float dtSquared) {
	/*
	 * Transform constants into OpenCL's data struct
	 */
	// Output constants
	float3 fconstants_out = (float3)(div_dxSquared, div_dzSquared, dtSquared);

	// Inputs contants
	float3 fconstants_in;

	/*
	 * Communication with channels
	 */
	// Writes on channels
	write_channel_intel(fargs[0], fconstants_out);
	mem_fence(CLK_CHANNEL_MEM_FENCE);

	// Reads from channels
	/*
	 * In order to prevent compiler from inferring depth channels with constant's channels,
	 * the code needs to create a false cycle of channels
	 */
	fconstants_in = read_channel_intel(fargs[TIME]);
	mem_fence(CLK_CHANNEL_MEM_FENCE);
}

/***********************************************************
********************** READER KERNEL ***********************
***********************************************************/
__kernel void reader(__global volatile float* restrict prev_base,
		     __global volatile float* restrict next_base,
		     __constant float* restrict vel_squared) {
	/*
	 * Variables to calculate index in grid
	 */
	uint group_offset = get_local_id(0) * ACCESS_SIZE;
	uint x_offset = group_offset - 1;
	uint y = get_global_id(1);
	uint x_limit = ACCESS_SIZE + ORDER;

	/*
	 * Variable to send to PE
	 */
	channelData input;

	for (uint i = 0; i < ACCESS_SIZE; i++) input.data[i] = 0.0f;

	/*
	 * Sends data to PE
	 */
	for (uint i = 0; i < x_limit; i++) {
		uint real_x = x_offset + i;
		uint index = y * NX + real_x;

		if (real_x >= 0 && real_x < NX) {
			input.data[i] = prev_base[index];
		}
	}

	write_channel_intel(prev[0], input);
}

/***********************************************************
********************** WRITER KERNEL ***********************
***********************************************************/
__kernel void writer(__global volatile float* restrict next_base) {
	/*
	 * Variables to calculate index in grid
	 */
	uint group_offset = get_local_id(0) * ACCESS_SIZE;
	uint x_offset = group_offset - 1;
	uint y = get_global_id(1);
	uint x_limit = ACCESS_SIZE + ORDER;

	/*
	 * Reads from PE
	 */
	channelData output = read_channel_intel(prev[TIME]);

	/*
	 * Writes matrix
	 */
	for (uint i = 0; i < x_limit; i++) {
		uint real_x = x_offset + i;
		uint index = y * NX + real_x;

		if (real_x >= 0 && real_x < NX) {
			next_base[index] = output.data[i];
		}
	}
}

/***********************************************************
******************* PROCESSING ELEMENTS ********************
***********************************************************/
__attribute__((max_global_work_dim(0)))
__attribute__((autorun))
__attribute__((num_compute_units(TIME, 1, 1)))
__kernel void PE() {
	/*
	 * Gets current and next PE
	 */
	const uint id = get_compute_id(0);
	const uint next_id = id + 1;

	/*
	 * Reads arguments
	 */
	const float3 fconstants = read_channel_intel(fargs[id]);
	mem_fence(CLK_CHANNEL_MEM_FENCE);

	/*
	 * Writes arguments on next channel
	 */
	write_channel_intel(fargs[next_id], fconstants);

	/*
	 * Memory Allocation
	 */
	// Shift registers
	__local float __attribute__((doublepump,
				     memory("MLAB"),
				     bankwidth(4))) sr_prev[SR_SIZE],
		      				    sr_next[SR_SIZE],
		      				    sr_vel[SR_SIZE];

	const float div_dxSquared = fconstants.s0;
	const float div_dzSquared = fconstants.s1;
	const float dtSquared = fconstants.s2;

	// Channel communication
	channelData input;
	channelData output;

	// Times channel will be read
	const uint count = 2 * GROUPS + 1;

	/*
	 * Computation
	 */
	// Index of shift registers
	uint index = 0;

	for (uint i = 0; i < count; i++) {
		input = read_channel_intel(prev[id]);

		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
			sr_prev[index++] = input.data[pos];
	}

	for (uint i = 0; i < GROUPS * NZ - count; i++) {
		for (uint group = 0; group < GROUPS; group++) {}
		// Writes future discarded values in channels
		for (uint pos = 0; pos < ACCESS_SIZE; pos++)
			output.data[pos] = sr_prev[pos];
		
		write_channel_intel(prev[next_id], output);

		// Shifts registers
		#pragma unroll
		for (uint pos = 0; pos < SR_LIMIT; pos++) {
			uint shift_pos = ACCESS_SIZE + pos;

			sr_prev[pos] = sr_prev[shift_pos];
		}

		// Read from channels
		input = read_channel_intel(prev[id]);

		for (uint pos = SR_LIMIT; pos < SR_SIZE; pos++)
			sr_prev[pos] = input.data[pos - SR_LIMIT];
	}
}
AnilErinch_A_Intel
353 Views

Hi ,

Thank you for contacting Intel Support.


Can you try the example mentioned in the below link and check whether same error happens.


https://www.intel.com/content/www/us/en/programmable/customertraining/OLT/OpenCLChannels/lab.pdf


Thanks and Regards

Anil


Reply