- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Greetings,
I have a library with SSE4 code, but that optimized code contains inline assembler, with jump label and conditional jump instruction to that label. Sadly, I need to enable only SSE1, which is slower compared to as it were compiled with usage of SSE4 instructions (by preprocessor).
but when ICC XE update 3 (and past compilers) reaches function with inline optimized assembler for mentioned SSE4, it dies into error:
error : Labels are currently unsupported - 'MULT_M_sse4:'
This happens also if I enable SSE2 or SSE3 or 4 usage, as this library have configuration header by preprocessor.
The worst thing is that I need to specify into that header file "do not use SSE2, 3 and 4", so it uses SSE1, which is indeed waisting the CPU (core2 quad) performance, and it would get extreme performance if I could enable SSE4 code.
My question is: is there any plan for Intel compiler to support jump labels in inline assembler? When can I expect this feature?
FYI: I post the critical code (out of thousands of such code), as an example, to make things clear:
--- snip ---
#ifdef LIBFV3_DOUBLE
#ifdef ENABLE_SSE4
__asm__ __volatile__
("MULT_M_sse4: \n\t"
// double tL0 = oL[0] + iL[0] * fL[0];
// double tL1 = oL[1] + iL[1] * fL[1];
"movapd (%1), %%xmm7 \n\t"
"mulpd (%2), %%xmm7 \n\t"
"addpd (%0), %%xmm7 \n\t"
"mov $0x80, %4 \n\t"
"pxor %%xmm6, %%xmm6 \n\t"
"movd %4, %%xmm6 \n\t"
// The bitshift is not supported in SSE :-(
// ex.
// pcmpeqd xmm0, xmm0 XMM0 = 0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFh
// pslldq xmm0, 3 XMM0 = 0FFFFFFFFFFFFFFFFFFFFFFFFFF000000h pslldq:sse2
"pslldq $0xf, %%xmm6 \n\t" // bitmask = xmm6 ([0]=0,[1]=-1)
// XMM6 = 8000000000000000 0000000000000000h
"mov %0, %4 \n\t" // save %0(oL)
// loop
"MULT_M_sse4_loop: \n\t" // bitmask:xmm6
"prefetchnta 0x80(%1) \n\t"
"movapd (%1), %%xmm0 \n\t" // xmm0,1,2
"movapd 0x10(%1), %%xmm3 \n\t" // xmm3,4,5
"add $0x20, %1 \n\t"
"prefetchnta 0x80(%2) \n\t"
"movapd (%2), %%xmm1 \n\t"
"movapd 0x10(%2), %%xmm4 \n\t"
"add $0x20, %2 \n\t"
"movapd %%xmm1, %%xmm2 \n\t"
"movapd %%xmm4, %%xmm5 \n\t"
"add $0x20, %0 \n\t"
"xorpd %%xmm6, %%xmm1 \n\t"
"xorpd %%xmm6, %%xmm4 \n\t"
"dppd $0x31, %%xmm0, %%xmm1 \n\t" // (a,b)#(c,-d)->(ac-bd,0)
"dppd $0x31, %%xmm3, %%xmm4 \n\t"
"shufpd $0x3, %%xmm2, %%xmm2 \n\t" // (c,d)->(d,c)
"shufpd $0x3, %%xmm5, %%xmm5 \n\t"
"dppd $0x32, %%xmm0, %%xmm2 \n\t" // (a,b)#(d,c)->(0,ad+bc)
"dppd $0x32, %%xmm3, %%xmm5 \n\t"
"xorpd %%xmm1, %%xmm2 \n\t" // (ac-bd,ad+bc)
"xorpd %%xmm4, %%xmm5 \n\t"
"prefetcht2 0x80(%0) \n\t"
"addpd -0x20(%0), %%xmm2 \n\t"
"addpd -0x10(%0), %%xmm5 \n\t"
"dec %3 \n\t"
"movapd %%xmm2, -0x20(%0) \n\t"
"movapd %%xmm5, -0x10(%0) \n\t"
"jne MULT_M_sse4_loop \n\t"
"MULT_M_sse4_save: \n\t"
// oL[0] = tL0; oL[1] = tL1;
"movapd %%xmm7, (%4) \n\t"
:
: "q"(oL), "q"(iL), "q"(fL), "r"(n/2), "r"(soL)
: "memory");
return;
#endif
--- snip ---
and it dies on error: error : Labels are currently unsupported - 'MULT_M_sse4:'
When can I except this support, in ICC 13.x update 4? or in (planned) version 14.x ?
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Using 2011.9.300, this snip works here:
[cpp]
__asm {
push ecx ;; may be able to remove save/restore ecx
mov ecx, nArgs ; -1:-9
not ecx ; 0:8
jz $11
mov eax, dword ptr pSubLoc
$1:
push dword ptr [eax+ecx*4+4]
dec ecx
jne $1
$11:
mov ecx, dword ptr [eax+4] ; arg2 (this)
mov eax, dword ptr [eax] ; DoWorkFunc
call eax
pop ecx ;; may be able to remove save/restore ecx
}
[/cpp]
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
jimdempseyatthecove wrote:
Using 2011.9.300, this snip works here:
__asm { push ecx ;; may be able to remove save/restore ecx mov ecx, nArgs ; -1:-9 not ecx ; 0:8 jz $11 mov eax, dword ptr pSubLoc $1: push dword ptr [eax+ecx*4+4] dec ecx jne $1 $11: mov ecx, dword ptr [eax+4] ; arg2 (this) mov eax, dword ptr [eax] ; DoWorkFunc call eax pop ecx ;; may be able to remove save/restore ecx }
Jim Dempsey
But I was speaking of goto labels inside of inline assembler block. Please see and revaualte my first post.
TIA!
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Marian "VooDooMan" Meravy wrote:
Quote:
jimdempseyatthecovewrote:Using 2011.9.300, this snip works here:
__asm { push ecx ;; may be able to remove save/restore ecx mov ecx, nArgs ; -1:-9 not ecx ; 0:8 jz $11 mov eax, dword ptr pSubLoc $1: push dword ptr [eax+ecx*4+4] dec ecx jne $1 $11: mov ecx, dword ptr [eax+4] ; arg2 (this) mov eax, dword ptr [eax] ; DoWorkFunc call eax pop ecx ;; may be able to remove save/restore ecx }
Jim Dempsey
But I was speaking of goto labels inside of inline assembler block. Please see and revaualte my first post.
TIA!
and the library is SO HUGE, so I am almsot unable change the code to "$x" from "xxx_jump_label:". it is impossible, since the library is extremely huge...
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Sergey Kostrov wrote:Greetings, but this problem is regarding 3rd part library, and the library uses e.g. SSE4 (or earlier regarding to configuration header/preprocessor), and it uses it at extremely number of units (SSE4 code for instance, in many units). So it is impossible for me to re-code sources of library, which would take at least 1 month.... It has pre-processor options to use SSE (1). SSE2, SSE3, or SSE4. my CPU support SSE 4, but I'm stuck to SSE (1) only, since ICC doesn't support jump labels in inline assembler. Note to Intel: will be inline assembler jumps implemented in ICC 14? or I should ask to be a beta tester (which would be my pleasure).
Intel TBB team uses inline assembler ( Microsoft and AT&T styles ) for different platforms ( many C/C++ compilers ) and take a look at TBB sources.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Did the author of the library use uniform naming conventions?
If so, then the number of different names used for lables might be relatively short though the number of instances be large. In this case then possibly a managable number of:
#define MULT_M_sse4_loop $MULT_M_sse4_loop
...
could be added and and included in a common header.
Note, if you are handy with grep or other tool, the list of names for define could almost be assembled programmaticly.
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
There are some restrictions on the usage of labels. The compiler only allows local labels, and only references to labels within the same assembly statement are permitted. A local label has the form “N:”, where N is a non-negative integer. N does not have to be unique, even within the same assembly statement. To reference the most recent definition of label N, use “Nb”. To reference the next definition of label N, use “Nf”. In this context, “b” means backward and “f” means forward. For more information, refer to the GNU assembler documentation.
Regards, Hubert.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
hubert-haberstock (Intel) wrote:
There are some restrictions on the usage of labels. The compiler only allows local labels, and only references to labels within the same assembly statement are permitted. A local label has the form “N:”, where N is a non-negative integer. N does not have to be unique, even within the same assembly statement. To reference the most recent definition of label N, use “Nb”. To reference the next definition of label N, use “Nf”. In this context, “b” means backward and “f” means forward. For more information, refer to the GNU assembler documentation.
Regards, Hubert.
Thank you very much Hubert. I know you from Intel can't comment on forum on Intel's future plans. So take following question as a feature request: Is there any plan to support this kind of labels in inline assembler? I.e. remove the restriction?
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page