- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I have a question about in appended single CL file case,
1、in followed CL file ,when only have clip_8b_kernel、ITransformOne_kernel、GetSSE_kernel and if put ITransformOne_kernel before GetSSE_kernel will get an error as followed if place them in the order of clip_8b_kernel、GetSSE_kernel、ITransformOne_kernel can compile successfully . 2、this CL file include multiple kernel and i find If compile any of these kernels separately have no error but the kernel of ITransform_C_kernel and ITransform_one_kernel Cannot exist simultaneously, if exist simultaneously it will have the error followed. error content: aoc: Linking with IP library ... /build/swbuild/SJ/nightly/17.0/290/l64/p4/acl/llvm/include/acl/Analysis/LocalMemSizing.h(83) ******* Requested memsize for unavailable aspace m_local_mem_size_per_workgroup.count(aspace) FAILED 0 libLLVM-3.0.so 0x00007f348ee2846f 1 libLLVM-3.0.so 0x00007f348ee2a3e2 2 libpthread.so.0 0x00007f348da905e0 3 libLLVM-3.0.so 0x00007f348f617ef9 acl::get_bits(llvm::Value const*, llvm::TargetData*, acl::LocalMemSizing*, bool, bool, bool) + 7289 4 libLLVM-3.0.so 0x00007f348e37a880 grif::Pass_AreaReport::add_private_var_resources() + 8800 5 libLLVM-3.0.so 0x00007f348e37c7e1 grif::Pass_AreaReport::runPass(grif::XNode*) + 913 6 libLLVM-3.0.so 0x00007f348e34c58a acl::DSDKGenerate::runOnModule(llvm::Module&) + 8010 7 libLLVM-3.0.so 0x00007f348f049fc1 llvm::MPPassManager::runOnModule(llvm::Module&) + 577 8 libLLVM-3.0.so 0x00007f348f04a16b llvm::PassManagerImpl::run(llvm::Module&) + 187 9 aocl-llc 0x000000000040be40 main + 5360 10 libc.so.6 0x00007f348ca9ec05 __libc_start_main + 245 11 aocl-llc 0x00000000004098e9 Stack dump: 0. Program arguments: /home/****/inteldevstack/intelFPGA_pro/hld/linux64/bin/aocl-llc -march=griffin -board /home/****/inteldevstack/a10_gx_pac_ias_1_0_prq/opencl/opencl_bsp/hardware/pac_a10/board_spec.xml -dbg-info-enabled test.bc -o test.v 1. Running pass 'Generate DSDK netlist and HDL' on module 'test.bc'. Error: Verilog generator FAILED. Refer to test/test.log for details. cl file: typedef uchar uint8_t; typedef short uint16_t; typedef int uint32_t;# define clip_8b_const_v_LEN 1# define clip_8b_out_clip_8b_return_LEN 1# define ITransformOne_in_ref_LEN 200# define ITransformOne_in_in_LEN 200# define ITransformOne_inout_dst_LEN 200# define GetSSE_in_a_LEN 200# define GetSSE_in_b_LEN 200# define GetSSE_const_w_LEN 1# define GetSSE_const_h_LEN 1# define GetSSE_out_GetSSE_return_LEN 1# define ITransform_C_in_ref_LEN 200# define ITransform_C_in_in_LEN 200# define ITransform_C_inout_dst_LEN 200# define ITransform_C_const_do_two_LEN 1 /****** VP8Calc -- VP8Transform *************************************************/# define WEBP_INLINE inline# define BPS 32 static WEBP_INLINE uchar clip_8b(int v) { return (!(v & ~0xff)) ? v : (v < 0) ? 0 : 255; } # define STORE(x, y, v) dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))# define kC1 (20091 + (1 << 16)) // DF: NEW# define kC2 35468 // DF: NEW# define MUL(a, b) (((a) * (b)) >> 16) static WEBP_INLINE void ITransformOne(const uint8_t * ref, const uint16_t* in, uint8_t* dst) { int C[4 * 4], *tmp; int i; tmp = C; for (i = 0; i < 4; ++i) { // vertical pass const int a = in[0] + in[8]; const int b = in[0] - in[8]; const int c = MUL(in[4], kC2) - MUL(in[12], kC1); const int d = MUL(in[4], kC1) + MUL(in[12], kC2); tmp[0] = a + d; tmp[1] = b + c; tmp[2] = b - c; tmp[3] = a - d; tmp += 4; in++; } tmp = C; for (i = 0; i < 4; ++i) { // horizontal pass const int dc = tmp[0] + 4; const int a = dc + tmp[8]; const int b = dc - tmp[8]; const int c = MUL(tmp[4], kC2) - MUL(tmp[12], kC1); const int d = MUL(tmp[4], kC1) + MUL(tmp[12], kC2); STORE(0, i, a + d); STORE(1, i, b + c); STORE(2, i, b - c); STORE(3, i, a - d); tmp++; } } void ITransform_C(const uint8_t * ref,const uint16_t * in,uint8_t * dst, int do_two) { ITransformOne(ref, in, dst); if (do_two) { ITransformOne(ref+ 4, in + 16, dst+ 4); } } static WEBP_INLINE int GetSSE(const uint8_t* a, const uint8_t* b, int w, int h) { int count = 0; int y, x; for (y = 0; y < h; ++y) { // DF: TBC, may need to unroll this for (x = 0; x < w; ++x) { const int diff = (int)a[x] - b[x]; count += diff * diff; } a += BPS; b += BPS; } return count; } __kernel void clip_8b_kernel( int v,__global uchar* restrict clip_8b_return){ printf("start func:clip_8b\n"); uchar clip_8b_return_[clip_8b_out_clip_8b_return_LEN]; clip_8b_return[0] = clip_8b(v); for(int i = 0;i<clip_8b_out_clip_8b_return_LEN;i++){ clip_8b_return = clip_8b_return_; } } __kernel void ITransformOne_kernel(__global uint8_t * restrict ref,__global uint16_t* restrict in,__global uint8_t* restrict dst){ printf("start func:ITransformOne\n"); uint8_t ref_[ITransformOne_in_ref_LEN]; uint16_t in_[ITransformOne_in_in_LEN]; uint8_t dst_[ITransformOne_inout_dst_LEN]; for(int i = 0;i<ITransformOne_in_ref_LEN;i++){ ref_ = ref; } for(int i = 0;i<ITransformOne_in_in_LEN;i++){ in_ = in; } for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){ dst_ = dst; } ITransformOne(ref_,in_,dst_); for(int i = 0;i<ITransformOne_inout_dst_LEN;i++){ dst = dst_; } } __kernel void GetSSE_kernel(__global uint8_t* restrict a,__global uint8_t* restrict b, int w, int h,__global int* restrict GetSSE_return){ printf("start func:GetSSE\n"); uint8_t a_[GetSSE_in_a_LEN]; uint8_t b_[GetSSE_in_b_LEN]; int GetSSE_return_[GetSSE_out_GetSSE_return_LEN]; for(int i = 0;i<GetSSE_in_a_LEN;i++){ a_ = a; } for(int i = 0;i<GetSSE_in_b_LEN;i++){ b_ = b; } GetSSE_return[0] = GetSSE(a_,b_,w,h); for(int i = 0;i<GetSSE_out_GetSSE_return_LEN;i++){ GetSSE_return = getsse_return_; } } __kernel void ITransform_C_kernel(__global uint8_t * restrict ref,__global uint16_t * restrict in,__global uint8_t * restrict dst, int do_two){ printf("start func:ITransform_C\n"); uint8_t ref_[ITransform_C_in_ref_LEN]; uint16_t in_[ITransform_C_in_in_LEN]; uint8_t dst_[ITransform_C_inout_dst_LEN]; for(int i = 0;i<ITransform_C_in_ref_LEN;i++){ ref_ = ref; } for(int i = 0;i<ITransform_C_in_in_LEN;i++){ in_ = in; } for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){ dst_ = dst; } ITransform_C(ref_,in_,dst_,do_two); for(int i = 0;i<ITransform_C_inout_dst_LEN;i++){ dst = dst_; } } who can help me? thanks in advance.Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Well, yes, your code crashes the compiler from v17.0 all the way to v18.0. However, it compiles correctly with v16.1.2. You have a lot of issues in your code reported by the v16.1.2 compiler. If you fix these, it will likely also compile with the newer versions:
Compiler Warning: Auto-unrolled loop at /nfshome/hrz/test.cl:40
Compiler Warning: Auto-unrolled loop at /nfshome/hrz/test.cl:40
Compiler Warning: Auto-unrolled loop at /nfshome/hrz/test.cl:40
/nfshome/hrz/test.cl:130: Compiler Warning: Aggressive compiler optimization: removing unnecessary storage to local memory
/nfshome/hrz/test.cl:133: Compiler Warning: Aggressive compiler optimization: removing unnecessary storage to local memory
Warning: Kernel 'clip_8b_kernel' has unused argument 'v'
Warning: Kernel 'clip_8b_kernel' has unused argument 'clip_8b_return'
Warning: Kernel 'GetSSE_kernel' has unused argument 'a'
Warning: Kernel 'GetSSE_kernel' has unused argument 'b'
Warning: Kernel 'GetSSE_kernel' has unused argument 'w'
Warning: Kernel 'GetSSE_kernel' has unused argument 'h'
Warning: Kernel 'GetSSE_kernel' has unused argument 'GetSSE_return'
Specifically, the unnecessary usage of local memory in lines 130 and 133 is very likely the cause of the crash with the new versions of the compiler. If you remove the local memory usage in those lines, it will likely allow the kernel to be compiled correctly.

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page