Intel® oneAPI Data Parallel C++
Support for Intel® oneAPI DPC++ Compiler, Intel® oneAPI DPC++ Library, Intel ICX Compiler , Intel® DPC++ Compatibility Tool, and GDB*

icx 2023.1.0 wrong optimization

Frank_R_1
Beginner
1,690 Views

Dear support,

We encountered a problem with icx compiler on Linux and Windows considering optimization
Setup:
Intel oneAPI 2023.1.0
Windows 10 Enterprise
RHEL 8.7

Find below a reproducer icx.cxx and its compiler output for Linux and Windows
func1 is optimized wrong:
movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm4
ucomisd %xmm2, %xmm4
divsd %xmm2, %xmm3 <---- division by zero if len is zero
movapd %xmm2, %xmm1
cmpltsd %xmm4, %xmm1
andnpd %xmm3, %xmm1
xorpd %xmm3, %xmm3
ja .LBB0_2
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm0
movapd %xmm0, %xmm3
.LBB0_2:
leaq "??_7MSvector@@6B@"(%rip), %rcx
movq %rcx, (%rax)
movupd %xmm3, 8(%rax)
movlpd %xmm1, 24(%rax)
retq

func2 is correct:
movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm3
ucomisd %xmm2, %xmm3
jbe .LBB1_2 <---- correct branch
xorpd %xmm0, %xmm0
movupd %xmm0, (%rcx)
xorpd %xmm0, %xmm0
movsd %xmm0, 24(%rax)
retq
.LBB1_2:
divsd %xmm2, %xmm0
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm1 <---- ok never divide by zero
movupd %xmm1, (%rcx)
movsd %xmm0, 24(%rax)
retq

Intel oneAPI 2022.2.1 does it right:
https://godbolt.org/z/YY5Y3TTjW


Best regards
Frank

file icx.cxx
//windows compile options
//icx -Qstd=c++17 -O3 -Ob2 -S -fp=precise -Qimf-arch-consistency:true -Qfma- icx.cxx
//linux compile options
//icx -std=c++17 -O3 -inline-level=2 -fp-model=precise -fimf-arch-consistency=true -no-fma icx.cxx
#include <cmath>
class MSvector {
public:
double x, y, z;
MSvector() {}
virtual ~MSvector() {}
MSvector(const double fx, const double fy, const double fz) {
x = fx;
y = fy;
z = fz;
}
const MSvector &operator/=(const double d) {
x /= d;
y /= d;
z /= d;
return *this;
}
double len() const { return std::sqrt(x * x + y * y + z * z); }
};
class MSunit_vector : public MSvector {
public:
MSunit_vector() {}
MSunit_vector(const double fx, const double fy, const double fz)
: MSvector(fx, fy, fz) {
const double size = len();
#define SMALL_VEC_LEN (1.0E-10)
if (size < SMALL_VEC_LEN)
*(MSvector *)this = MSvector(0.0, 0.0, 0.0);
else
*(MSvector *)this /= size;
}
};
inline MSunit_vector normalise(const MSvector &v) {
return MSunit_vector(v.x, v.y, v.z);
}

auto func1(const MSvector &vn) {
MSvector v = normalise(vn);
return v;
}

auto func2(const MSvector &vn) {
return normalise(vn);
}
//icx -Qstd=c++17 -O3 -Ob2 -S -fp=precise -Qimf-arch-consistency:true -Qfma- icx.cxx
//windows output
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
.text
.def @feat.00;
.scl 3;
.type 0;
.endef
.globl @feat.00
.set @feat.00, 0
.file "icx.cxx"
.def "?func1@@YA?A?<auto>@@AEBVMSvector@@@Z";
.scl 2;
.type 32;
.endef
.globl __real@3ddb7cdfd9d7bdbb
.section .rdata,"dr",discard,__real@3ddb7cdfd9d7bdbb
.p2align 3, 0x0
__real@3ddb7cdfd9d7bdbb:
.quad 0x3ddb7cdfd9d7bdbb
.section .text,"xr",one_only,"?func1@@YA?A?<auto>@@AEBVMSvector@@@Z"
.globl "?func1@@YA?A?<auto>@@AEBVMSvector@@@Z"
.p2align 4, 0x90
"?func1@@YA?A?<auto>@@AEBVMSvector@@@Z":
movq %rcx, %rax
movsd 24(%rdx), %xmm3
movupd 8(%rdx), %xmm0
movapd %xmm0, %xmm1
mulpd %xmm0, %xmm1
movapd %xmm1, %xmm2
unpckhpd %xmm1, %xmm2
addsd %xmm1, %xmm2
movapd %xmm3, %xmm1
mulsd %xmm3, %xmm1
addsd %xmm2, %xmm1
xorps %xmm2, %xmm2
sqrtsd %xmm1, %xmm2
movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm4
ucomisd %xmm2, %xmm4
divsd %xmm2, %xmm3
movapd %xmm2, %xmm1
cmpltsd %xmm4, %xmm1
andnpd %xmm3, %xmm1
xorpd %xmm3, %xmm3
ja .LBB0_2
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm0
movapd %xmm0, %xmm3
.LBB0_2:
leaq "??_7MSvector@@6B@"(%rip), %rcx
movq %rcx, (%rax)
movupd %xmm3, 8(%rax)
movlpd %xmm1, 24(%rax)
retq

.def "?func2@@YA?A?<auto>@@AEBVMSvector@@@Z";
.scl 2;
.type 32;
.endef
.section .text,"xr",one_only,"?func2@@YA?A?<auto>@@AEBVMSvector@@@Z"
.globl "?func2@@YA?A?<auto>@@AEBVMSvector@@@Z"
.p2align 4, 0x90
"?func2@@YA?A?<auto>@@AEBVMSvector@@@Z":
movq %rcx, %rax
movsd 24(%rdx), %xmm0
movupd 8(%rdx), %xmm1
addq $8, %rcx
leaq "??_7MSunit_vector@@6B@"(%rip), %rdx
movq %rdx, (%rax)
movapd %xmm1, %xmm2
mulpd %xmm1, %xmm2
movapd %xmm2, %xmm3
unpckhpd %xmm2, %xmm3
addsd %xmm2, %xmm3
movapd %xmm0, %xmm2
mulsd %xmm0, %xmm2
addsd %xmm3, %xmm2
sqrtsd %xmm2, %xmm2
movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm3
ucomisd %xmm2, %xmm3
jbe .LBB1_2
xorpd %xmm0, %xmm0
movupd %xmm0, (%rcx)
xorpd %xmm0, %xmm0
movsd %xmm0, 24(%rax)
retq
.LBB1_2:
divsd %xmm2, %xmm0
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm1
movupd %xmm1, (%rcx)
movsd %xmm0, 24(%rax)
retq

.def "??_GMSunit_vector@@UEAAPEAXI@Z";
.scl 2;
.type 32;
.endef
.section .text,"xr",discard,"??_GMSunit_vector@@UEAAPEAXI@Z"
.globl "??_GMSunit_vector@@UEAAPEAXI@Z"
.p2align 4, 0x90
"??_GMSunit_vector@@UEAAPEAXI@Z":
.seh_proc "??_GMSunit_vector@@UEAAPEAXI@Z"
pushq %rsi
.seh_pushreg %rsi
subq $32, %rsp
.seh_stackalloc 32
.seh_endprologue
movq %rcx, %rsi
testl %edx, %edx
je .LBB2_2
movq %rsi, %rcx
callq "??3@YAXPEAX@Z"
.LBB2_2:
movq %rsi, %rax
addq $32, %rsp
popq %rsi
retq
.seh_endproc

.def "??_GMSvector@@UEAAPEAXI@Z";
.scl 2;
.type 32;
.endef
.section .text,"xr",discard,"??_GMSvector@@UEAAPEAXI@Z"
.globl "??_GMSvector@@UEAAPEAXI@Z"
.p2align 4, 0x90
"??_GMSvector@@UEAAPEAXI@Z":
.seh_proc "??_GMSvector@@UEAAPEAXI@Z"
pushq %rsi
.seh_pushreg %rsi
subq $32, %rsp
.seh_stackalloc 32
.seh_endprologue
movq %rcx, %rsi
testl %edx, %edx
je .LBB3_2
movq %rsi, %rcx
callq "??3@YAXPEAX@Z"
.LBB3_2:
movq %rsi, %rax
addq $32, %rsp
popq %rsi
retq
.seh_endproc

.section .rdata,"dr",largest,"??_7MSunit_vector@@6B@"
.p2align 3, 0x0
.L__unnamed_1:
.quad "??_R4MSunit_vector@@6B@"
.quad "??_GMSunit_vector@@UEAAPEAXI@Z"

.section .rdata,"dr",discard,"??_R4MSunit_vector@@6B@"
.globl "??_R4MSunit_vector@@6B@"
.p2align 4, 0x0
"??_R4MSunit_vector@@6B@":
.long 1
.long 0
.long 0
.long "??_R0?AVMSunit_vector@@@8"@IMGREL
.long "??_R3MSunit_vector@@8"@IMGREL
.long "??_R4MSunit_vector@@6B@"@IMGREL

.section .data,"dw",discard,"??_R0?AVMSunit_vector@@@8"
.globl "??_R0?AVMSunit_vector@@@8"
.p2align 4, 0x0
"??_R0?AVMSunit_vector@@@8":
.quad "??_7type_info@@6B@"
.quad 0
.asciz ".?AVMSunit_vector@@"
.zero 4

.section .rdata,"dr",discard,"??_R3MSunit_vector@@8"
.globl "??_R3MSunit_vector@@8"
.p2align 3, 0x0
"??_R3MSunit_vector@@8":
.long 0
.long 0
.long 2
.long "??_R2MSunit_vector@@8"@IMGREL

.section .rdata,"dr",discard,"??_R2MSunit_vector@@8"
.globl "??_R2MSunit_vector@@8"
.p2align 2, 0x0
"??_R2MSunit_vector@@8":
.long "??_R1A@?0A@EA@MSunit_vector@@8"@IMGREL
.long "??_R1A@?0A@EA@MSvector@@8"@IMGREL
.long 0

.section .rdata,"dr",discard,"??_R1A@?0A@EA@MSunit_vector@@8"
.globl "??_R1A@?0A@EA@MSunit_vector@@8"
.p2align 4, 0x0
"??_R1A@?0A@EA@MSunit_vector@@8":
.long "??_R0?AVMSunit_vector@@@8"@IMGREL
.long 1
.long 0
.long 4294967295
.long 0
.long 64
.long "??_R3MSunit_vector@@8"@IMGREL

.section .rdata,"dr",discard,"??_R1A@?0A@EA@MSvector@@8"
.globl "??_R1A@?0A@EA@MSvector@@8"
.p2align 4, 0x0
"??_R1A@?0A@EA@MSvector@@8":
.long "??_R0?AVMSvector@@@8"@IMGREL
.long 0
.long 0
.long 4294967295
.long 0
.long 64
.long "??_R3MSvector@@8"@IMGREL

.section .data,"dw",discard,"??_R0?AVMSvector@@@8"
.globl "??_R0?AVMSvector@@@8"
.p2align 4, 0x0
"??_R0?AVMSvector@@@8":
.quad "??_7type_info@@6B@"
.quad 0
.asciz ".?AVMSvector@@"
.zero 1

.section .rdata,"dr",discard,"??_R3MSvector@@8"
.globl "??_R3MSvector@@8"
.p2align 3, 0x0
"??_R3MSvector@@8":
.long 0
.long 0
.long 1
.long "??_R2MSvector@@8"@IMGREL

.section .rdata,"dr",discard,"??_R2MSvector@@8"
.globl "??_R2MSvector@@8"
.p2align 2, 0x0
"??_R2MSvector@@8":
.long "??_R1A@?0A@EA@MSvector@@8"@IMGREL
.long 0

.section .rdata,"dr",largest,"??_7MSvector@@6B@"
.p2align 3, 0x0
.L__unnamed_2:
.quad "??_R4MSvector@@6B@"
.quad "??_GMSvector@@UEAAPEAXI@Z"

.section .rdata,"dr",discard,"??_R4MSvector@@6B@"
.globl "??_R4MSvector@@6B@"
.p2align 4, 0x0
"??_R4MSvector@@6B@":
.long 1
.long 0
.long 0
.long "??_R0?AVMSvector@@@8"@IMGREL
.long "??_R3MSvector@@8"@IMGREL
.long "??_R4MSvector@@6B@"@IMGREL

.section .drectve,"yni"
.ascii " /DEFAULTLIB:libcpmt.lib"
.ascii " /DEFAULTLIB:libcmt.lib"
.ascii " /DEFAULTLIB:libircmt.lib"
.ascii " /DEFAULTLIB:svml_dispmt.lib"
.ascii " /DEFAULTLIB:libdecimal.lib"
.ascii " /DEFAULTLIB:libmmt.lib"
.ascii " /DEFAULTLIB:oldnames.lib"
.ascii " /FAILIFMISMATCH:\"_MSC_VER=1900\""
.ascii " /FAILIFMISMATCH:\"_ITERATOR_DEBUG_LEVEL=0\""
.ascii " /FAILIFMISMATCH:\"RuntimeLibrary=MT_StaticRelease\""
.globl "??_7MSunit_vector@@6B@"
.set "??_7MSunit_vector@@6B@", .L__unnamed_1+8
.globl "??_7MSvector@@6B@"
.set "??_7MSvector@@6B@", .L__unnamed_2+8
.globl _fltused
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
//linux output
//icx -std=c++17 -O3 -inline-level=2 -fp-model=precise -fimf-arch-consistency=true -no-fma icx.cxx
///////////////////////////////////////////////////////////////////////////////////////////////////////////////
.text
.file "icx.cxx"
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0 # -- Begin function _Z5func1RK8MSvector
.LCPI0_0:
.quad 0x3ddb7cdfd9d7bdbb # 1.0E-10
.text
.globl _Z5func1RK8MSvector
.p2align 4, 0x90
.type _Z5func1RK8MSvector,@function
_Z5func1RK8MSvector: #
.cfi_startproc
# %bb.0:
movq %rdi, %rax
movupd 8(%rsi), %xmm0
movsd 24(%rsi), %xmm3 # xmm3 = mem[0],zero
movapd %xmm0, %xmm1
mulpd %xmm0, %xmm1
movapd %xmm1, %xmm2
unpckhpd %xmm1, %xmm2 # xmm2 = xmm2[1],xmm1[1]
addsd %xmm1, %xmm2
movapd %xmm3, %xmm1
mulsd %xmm3, %xmm1
addsd %xmm2, %xmm1
xorps %xmm2, %xmm2
sqrtsd %xmm1, %xmm2
movsd .LCPI0_0(%rip), %xmm4 # xmm4 = mem[0],zero
ucomisd %xmm2, %xmm4
divsd %xmm2, %xmm3
movapd %xmm2, %xmm1
cmpltsd %xmm4, %xmm1
andnpd %xmm3, %xmm1
xorpd %xmm3, %xmm3
ja .LBB0_2
# %bb.1:
unpcklpd %xmm2, %xmm2 # xmm2 = xmm2[0,0]
divpd %xmm2, %xmm0
movapd %xmm0, %xmm3
.LBB0_2:
movq $_ZTV8MSvector+16, (%rax)
movupd %xmm3, 8(%rax)
movlpd %xmm1, 24(%rax)
retq
.Lfunc_end0:
.size _Z5func1RK8MSvector, .Lfunc_end0-_Z5func1RK8MSvector
.cfi_endproc
# -- End function
.section .text._ZN8MSvectorD2Ev,"axG",@progbits,_ZN8MSvectorD2Ev,comdat
.weak _ZN8MSvectorD2Ev # -- Begin function _ZN8MSvectorD2Ev
.p2align 4, 0x90
.type _ZN8MSvectorD2Ev,@function
_ZN8MSvectorD2Ev: #
.cfi_startproc
# %bb.0:
retq
.Lfunc_end1:
.size _ZN8MSvectorD2Ev, .Lfunc_end1-_ZN8MSvectorD2Ev
.cfi_endproc
# -- End function
.section .rodata.cst8,"aM",@progbits,8
.p2align 3, 0x0 # -- Begin function _Z5func2RK8MSvector
.LCPI2_0:
.quad 0x3ddb7cdfd9d7bdbb # 1.0E-10
.text
.globl _Z5func2RK8MSvector
.p2align 4, 0x90
.type _Z5func2RK8MSvector,@function
_Z5func2RK8MSvector: #
.cfi_startproc
# %bb.0:
movq %rdi, %rax
movupd 8(%rsi), %xmm1
movsd 24(%rsi), %xmm0 # xmm0 = mem[0],zero
leaq 8(%rdi), %rcx
movq $_ZTV13MSunit_vector+16, (%rdi)
movapd %xmm1, %xmm2
mulpd %xmm1, %xmm2
movapd %xmm2, %xmm3
unpckhpd %xmm2, %xmm3 # xmm3 = xmm3[1],xmm2[1]
addsd %xmm2, %xmm3
movapd %xmm0, %xmm2
mulsd %xmm0, %xmm2
addsd %xmm3, %xmm2
sqrtsd %xmm2, %xmm2
movsd .LCPI2_0(%rip), %xmm3 # xmm3 = mem[0],zero
ucomisd %xmm2, %xmm3
jbe .LBB2_2
# %bb.1:
xorpd %xmm0, %xmm0
movupd %xmm0, (%rcx)
xorpd %xmm0, %xmm0
movsd %xmm0, 24(%rax)
retq
.LBB2_2:
divsd %xmm2, %xmm0
unpcklpd %xmm2, %xmm2 # xmm2 = xmm2[0,0]
divpd %xmm2, %xmm1
movupd %xmm1, (%rcx)
movsd %xmm0, 24(%rax)
retq
.Lfunc_end2:
.size _Z5func2RK8MSvector, .Lfunc_end2-_Z5func2RK8MSvector
.cfi_endproc
# -- End function
.section .text._ZN13MSunit_vectorD0Ev,"axG",@progbits,_ZN13MSunit_vectorD0Ev,comdat
.weak _ZN13MSunit_vectorD0Ev # -- Begin function _ZN13MSunit_vectorD0Ev
.p2align 4, 0x90
.type _ZN13MSunit_vectorD0Ev,@function
_ZN13MSunit_vectorD0Ev: #
.cfi_startproc
# %bb.0:
jmp _ZdlPv # TAILCALL
.Lfunc_end3:
.size _ZN13MSunit_vectorD0Ev, .Lfunc_end3-_ZN13MSunit_vectorD0Ev
.cfi_endproc
# -- End function
.section .text._ZN8MSvectorD0Ev,"axG",@progbits,_ZN8MSvectorD0Ev,comdat
.weak _ZN8MSvectorD0Ev # -- Begin function _ZN8MSvectorD0Ev
.p2align 4, 0x90
.type _ZN8MSvectorD0Ev,@function
_ZN8MSvectorD0Ev: #
.cfi_startproc
# %bb.0:
jmp _ZdlPv # TAILCALL
.Lfunc_end4:
.size _ZN8MSvectorD0Ev, .Lfunc_end4-_ZN8MSvectorD0Ev
.cfi_endproc
# -- End function
.type _ZTV13MSunit_vector,@object #
.section .rodata._ZTV13MSunit_vector,"aG",@progbits,_ZTV13MSunit_vector,comdat
.weak _ZTV13MSunit_vector
.p2align 3, 0x0
_ZTV13MSunit_vector:
.quad 0
.quad _ZTI13MSunit_vector
.quad _ZN8MSvectorD2Ev
.quad _ZN13MSunit_vectorD0Ev
.size _ZTV13MSunit_vector, 32

.type _ZTS13MSunit_vector,@object #
.section .rodata._ZTS13MSunit_vector,"aG",@progbits,_ZTS13MSunit_vector,comdat
.weak _ZTS13MSunit_vector
_ZTS13MSunit_vector:
.asciz "13MSunit_vector"
.size _ZTS13MSunit_vector, 16

.type _ZTS8MSvector,@object #
.section .rodata._ZTS8MSvector,"aG",@progbits,_ZTS8MSvector,comdat
.weak _ZTS8MSvector
_ZTS8MSvector:
.asciz "8MSvector"
.size _ZTS8MSvector, 10

.type _ZTI8MSvector,@object #
.section .rodata._ZTI8MSvector,"aG",@progbits,_ZTI8MSvector,comdat
.weak _ZTI8MSvector
.p2align 3, 0x0
_ZTI8MSvector:
.quad _ZTVN10__cxxabiv117__class_type_infoE+16
.quad _ZTS8MSvector
.size _ZTI8MSvector, 16

.type _ZTI13MSunit_vector,@object #
.section .rodata._ZTI13MSunit_vector,"aG",@progbits,_ZTI13MSunit_vector,comdat
.weak _ZTI13MSunit_vector
.p2align 3, 0x0
_ZTI13MSunit_vector:
.quad _ZTVN10__cxxabiv120__si_class_type_infoE+16
.quad _ZTS13MSunit_vector
.quad _ZTI8MSvector
.size _ZTI13MSunit_vector, 24

.type _ZTV8MSvector,@object #
.section .rodata._ZTV8MSvector,"aG",@progbits,_ZTV8MSvector,comdat
.weak _ZTV8MSvector
.p2align 3, 0x0
_ZTV8MSvector:
.quad 0
.quad _ZTI8MSvector
.quad _ZN8MSvectorD2Ev
.quad _ZN8MSvectorD0Ev
.size _ZTV8MSvector, 32

.ident "Intel(R) oneAPI DPC++/C++ Compiler 2023.1.0 (2023.1.0.20230320)"
.section ".note.GNU-stack","",@progbits

0 Kudos
10 Replies
VaishnaviV_Intel
Moderator
1,646 Views

Hi,

 

Thanks for posting in Intel communities.

Have you noticed any differences between the results obtained from using Intel oneAPI 2022.2.1 and 2023.1.0 versions?

Could you please provide us with the complete code and also elaborate more on your issue?

 

Thanks & Regards,

Vankudothu Vaishnavi.

 

0 Kudos
Frank_R_1
Beginner
1,638 Views

Hi,

 

I think the reproducer ist self explaining and shows the issue.

The problem in our production code is that floating point exceptions are turned on from a 3rd party package, so that this will lead to a crash.

And from my understanding the compiled code is wrong. Please correct me if I am wrong.

 

Best regards

Frank

0 Kudos
VaishnaviV_Intel
Moderator
1,575 Views

Hi,


Can you tell us which third-party package you're using? You mentioned that there were some issues with floating-point exceptions caused by this package. It would be great if you could isolate the problem in a reproducible example so we can investigate it further.


We also tested the sample you provided but didn't encounter any exceptions.


Thanks & Regards,

Vankudothu Vaishnavi.


0 Kudos
Frank_R_1
Beginner
1,564 Views

Hi,

 

This is a reproducer, please show it to your development team!

When I try to make it a main program with output the compiler does optimize different and the problem is gone!

The disassemby output of func1 shows the problem:


movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm4                   if (size < SMALL_VEC_LEN)
ucomisd %xmm2, %xmm4
divsd %xmm2, %xmm3                                                                      here division by zero if len is zero (xmm2 is zero) and this is wrong
movapd %xmm2, %xmm1
cmpltsd %xmm4, %xmm1
andnpd %xmm3, %xmm1
xorpd %xmm3, %xmm3
ja .LBB0_2
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm0
movapd %xmm0, %xmm3
.LBB0_2:
leaq "??_7MSvector@@6B@"(%rip), %rcx
movq %rcx, (%rax)
movupd %xmm3, 8(%rax)
movlpd %xmm1, 24(%rax)
retq

 

func2 does it correct with an additional branch

movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm3                   if (size < SMALL_VEC_LEN)
ucomisd %xmm2, %xmm3
jbe .LBB1_2                                                                                            correct branch!
xorpd %xmm0, %xmm0
movupd %xmm0, (%rcx)
xorpd %xmm0, %xmm0
movsd %xmm0, 24(%rax)
retq
.LBB1_2:
divsd %xmm2, %xmm0
unpcklpd %xmm2, %xmm2
divpd %xmm2, %xmm1                                                                       ok never divide by zero
movupd %xmm1, (%rcx)
movsd %xmm0, 24(%rax)
retq

 

Best regards

Frank

0 Kudos
VaishnaviV_Intel
Moderator
1,463 Views

Hi,


We understand your concern and would like to assist you with your problem. However, we would require some additional information to investigate the issue further.


You said that there are no problems with your code and that you are using a third-party package that enables floating point exceptions. We need to know more about this package and how you are using it in your code.


If you don't give us this information, we might not be able to help you solve the problem. So, please give us as much information as you can so we can investigate and find a solution.


Thank you for your cooperation.


Thanks & Regards,

Vankudothu Vaishnavi.


0 Kudos
Frank_R_1
Beginner
1,400 Views

Hi,

 

The 3rd party package has nothing to with this error. We enabled floating point checks for this package (ACIS kernel), therefore the 0/0 floating point exception was thrown!

This should never happen, because checking for vector length below a threshold (1.0E-10) no division should take place, but it does in the wrong optimized code section.

Root cause is first and foremost the wrong optimization:

movsd __real@3ddb7cdfd9d7bdbb(%rip), %xmm4
ucomisd %xmm2, %xmm4
divsd %xmm2, %xmm3

 

xmm2 and xmm3 are both zero if the MSvector is {0.0, 0.0, 0.0}

There has to be a branch before the division.

 

Best regards

Frank

0 Kudos
VaishnaviV_Intel
Moderator
1,325 Views

Hi,

 

Could you please confirm whether we are on the right track or not?

 

So, you are having optimization issues from func1() and not func2(). As, func2() is creating a branch before the division operation you are not getting any optimization issue but in func1() first division operation is done and then a branch is being called. In turn, this is leading to optimization issue and that leads to the code crash after enabling the floating point checks at your end.

Initially, you mentioned that “the problem in the production code was due to floating point exceptions being turned on from a 3rd party package, resulting in a crash”. However, you have now stated that “The 3rd party package has nothing to with this error. We enabled floating point checks for this package (ACIS kernel), therefore the 0/0 floating point exception was thrown!”. This statement contradicts the initial one, causing confusion about the issue.

Also, we noticed that if we use the same return statements for func1() and func2() as in func2(), we can see that a branch is being created before the division operation. Here are the functions:

auto func1(const MSvector &vn) {

  return normalise(vn);

}

auto func2(const MSvector &vn) {

  return normalise(vn);

This seems to be working well in icx 2022.2.1, but not in icx 2023.1.0. Is this correct?

 

And also Could you please help fill in the below table?

 

compiler command

test code

expected execution result

actual execution result

icpx 2023.1

 

 

 

 

icpx 2022.2.1

 

 

 

 

 

Thanks & Regards,

Vankudothu Vaishnavi.

 

0 Kudos
Frank_R_1
Beginner
1,278 Views

Hi,

 

>This seems to be working well in icx 2022.2.1, but not in icx 2023.1.0. Is this correct?

YES! Unfortunately https://godbolt.org/ does not have the newest compiler as selection, but you have seen my assembly code!

fun1 and fun2 with auto work correctly on Windows, on Linux we have still same behavior!

 

I can not fill the table, but the compiler commands are the same. The test code above is extracted from our production code and run correct on debug but not in release mode on both platforms!

 

I think the compiler developer team has a lot of tools to check invariants etc. what went wrong in optimization.

 

Best regards

Frank

0 Kudos
VaishnaviV_Intel
Moderator
1,189 Views

Hi,


For Community support we would require you to submit a minimum reproduction sample code specific to your issue that provides us the most relevant background information for triage. In case you require privacy, and unable to share the issue / sample with us publicly and If you are a licensed oneAPI product customer and/or member of Intel’s oneAPI Academic Program please submit a ticket for Priority support so that your application can be handled with the required data protection and privacy regulations


Thanks &Regards,

Vankudothu Vaishnavi.


0 Kudos
Frank_R_1
Beginner
1,179 Views

Hi,

 

I have done my best to report an issue and give every information a developer of the compiler teams needs. If this is not appreciated in the future I'll just wait for new versions to test and see if the bug is fixed.

Please close this thread.

 

Best  regards

Frank

0 Kudos
Reply