- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
This is adapted from OpenMolcas and causes a failure. There are other failures.
program test [40/42934]
call Get_Subblock()
end program test
!=======================================================================
module nq_Info
use, intrinsic :: iso_fortran_env, only: int64
integer(kind=int64) :: nx = 10, ny = 4, nz = 10
end module nq_Info
!=======================================================================
subroutine Get_Subblock()
use, intrinsic :: iso_fortran_env, only: int64, real64
use nq_Info, only: nx, ny, nz
implicit none
integer(kind=int64), parameter :: nNQ = 1
integer(kind=int64) :: ix, ixyz, iy, iyz, iz
real(kind=real64) :: R2, t1, t2, t3, x_max_, x_min, x_min_, x_NQ, y_max_, y_min, y_min_, y_NQ, z_max_, z_min, z_min_, z_NQ
logical :: done = .false.
if (done) return
done = .true.
ixyz = 6
x_min = -8.0_real64
y_min = -2.0_real64
z_min = -7.826364311586_real64
iyz = 1+(ixyz-1)/nx
ix = ixyz-(iyz-1)*nx
iz = 1+(iyz-1)/ny
iy = iyz-(iz-1)*ny
x_min_ = x_min+(ix-2)*2.0_real64
x_max_ = x_min_+2.0_real64
y_min_ = y_min+(iy-2)*2.0_real64
y_max_ = y_min_+2.0_real64
z_min_ = z_min+(iz-2)*2.0_real64
z_max_ = z_min_+2.0_real64
if (ix == 1) x_min_ = -1.0e99_real64
if (ix == nx) x_max_ = 1.0e99_real64
if (iy == 1) y_min_ = -1.0e99_real64
if (iy == ny) y_max_ = 1.0e99_real64
if (iz == 1) z_min_ = -1.0e99_real64
if (iz == nz) z_max_ = 1.0e99_real64
x_NQ = 1.40421662117370_real64
y_NQ = 0.000000000000000_real64
z_NQ = 1.02874664437177_real64
t1 = (x_NQ-x_min_)/(x_max_-x_min_)
if (t1 < 0.0_real64) t1 = 0.0_real64
if (t1 > 1.0_real64) t1 = 1.0_real64
t2 = (y_NQ-y_min_)/(y_max_-y_min_)
if (t2 < 0.0_real64) t2 = 0.0_real64
if (t2 > 1.0_real64) t2 = 1.0_real64
t3 = (z_NQ-z_min_)/(z_max_-z_min_)
if (t3 < 0.0_real64) t3 = 0.0_real64
if (t3 > 1.0_real64) t3 = 1.0_real64
R2 = (x_NQ-(x_max_-x_min_)*t1-x_min_)**2+(y_NQ-(y_max_-y_min_)*t2-y_min_)**2+(z_NQ-(z_max_-z_min_)*t3-z_min_)**2
write(6,*) R2
end subroutine Get_Subblock
Compiling with -O0 and -O2 gives wildly different results:
$ ifx -O0 a.f90 && ./a.out
0.000000000000000E+000
$ ifx -O2 a.f90 && ./a.out
1.05831965830618
$ ifx --version
ifx (IFX) 2024.0.1 20231122
Copyright (C) 1985-2023 Intel Corporation. All rights reserved.
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Another failure. Possibly a manifestation of the same underlying problem:
program test [15/59518]
use, intrinsic :: iso_fortran_env, only: real64
implicit none
real(kind=real64) :: B
call InitB2(12,B)
end program test
!=======================================================================
subroutine InitB2(nMult,B)
use, intrinsic :: iso_fortran_env, only: real64
implicit none
integer, parameter :: natom = 3, nAtQM = 3
integer :: nMult, IsMM(natom), iQM, jAt, nOrd
real(kind=real64) :: B, Ext(4,natom), TTT(12)
IsMM(:) = 0
TTT(:) = [ 0.0996240187701627_real64, -0.0678010789526136_real64, -0.0343428927289087_real64, -0.0772705078426976_real64, &
-0.1184159952122723_real64, -0.1835719229968422_real64, -0.0456886660060007_real64, -0.0287182833201296_real64, &
0.0250761796854473_real64, -0.0013503667449997_real64, 0.0067120114148270_real64, -0.0191993678682207_real64]
Ext(1,:) = [ 0.0497650609946744_real64, -0.0011448171391590_real64, 0.0233845240594584_real64]
Ext(2,:) = [ 0.0249820632156247_real64, 0.0379349209889440_real64, 0.0132155001662827_real64]
Ext(3,:) = [ 0.0055450458327068_real64, 0.0108261515099825_real64, -0.0010805609431091_real64]
Ext(4,:) = [-0.0119043865891981_real64, 0.0116827835572062_real64, -0.0134650174110233_real64]
nOrd = nMult/nAtQM
$ ifx -O0 a.f90 && ./a.out
-2.845138188721881E-003
$ ifx -O2 a.f90 && ./a.out
-1.328284734601879E-002
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
use option
-standard-semantics
for now. we will investigate these two cases.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Just curious: why would -standard-semantics work? It doesn't seem to, anyway. It changes the -O2 result, but it's still different from -O0:
$ ifx -standard-semantics -O2 a.f90 && ./a.out
-1.444657961216917E-002
$ ifx -standard-semantics -O0 a.f90 && ./a.out
-2.845138188721881E-003
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
You can also relax the optimizations used in llvm with:
-assume ieee_compares
first case:
$ ifx -O2 -assume ieee_compares unstable.f90
$ ./a.out
0.000000000000000E+000
this assume ieee_compares is included in "-standard-semantics".
I haven't fully tested the 2nd testcase. I'd guess -standard-semantics works for it as well.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Here's another one:
program test
implicit none
integer :: nRoots
common/foo/ nRoots
nRoots = 2
call DoPrec()
end program test
!=======================================================================
subroutine DoPrec()
implicit none
integer, parameter :: nConf = 2
real*8 :: Vec(nConf), CI(nConf,nRoots), Pre(nConf), Eact, Fancy(nRoots,nRoots), rcoeff(nRoots), alpha(nRoots)
integer :: iConf, jRoots, kRoots
integer :: nRoots
common/foo/ nRoots
Vec(:) = [ 0.469400439803905d0, -3.448312343202041d-8 ]
CI(:,:) = reshape([ -0.9597613699782059d0, 0.0000000001337804d0, 0.0401117754090611d0, 0.0000000023700793d0 ], &
[nConf,nRoots])
Fancy(:,:) = reshape([ 5.755684494933862d-2, 1.774045100172686d-2, 1.774045100172686d-2, 0.476332301353075d0 ], &
[nRoots,nRoots])
Pre(:) = [ -6.9845687858236687d0, -6.6528270206770683d0 ]
Eact = 7.03729892190616d0
rcoeff(:) = [ -2.375558064262605d-2, 9.928285772435944d-4 ]
do jRoots=1,nRoots
alpha(jRoots) = 0.0d0
do kRoots=1,nRoots
alpha(jRoots) = alpha(jRoots)+Fancy(jRoots,kRoots)*rcoeff(kRoots)
end do
end do
do jRoots=1,nRoots
do iConf=1,nConf
Vec(iConf) = Vec(iConf)-CI(iConf,jRoots)*alpha(jRoots)/(Pre(iConf)+Eact)
end do
end do
write(6,*) Vec(1)
end subroutine DoPrec
$ ifx -O0 a.f90 && ./a.out
0.444795180585146
$ ifx -O2 a.f90 && ./a.out
0.469400439803905
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The second example with InitB2 seems to be truncated.
Optimizations with ifx are very very different from ifort. That they produce different results should be expected.
In the 1st and 3rd example it seems you really do want IEEE compliant arithmetic. Is this a correct assumption? If so, -O2 by itself is too aggressive in optimization.
What I think you really want is this
ifx -O2 -fltconsistency
I believe this will achieve your goal of maintaining IEEE arithmetic to the lowest ULP. This is what you want. O2 is not that by itself. I do not see a bug here, merely difference in FP optimizations between compilers. Please try -O2 -fltconsistency
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Sorry for the truncated code, this should be the full example:
program test
use, intrinsic :: iso_fortran_env, only: real64
implicit none
real(kind=real64) :: B
call InitB2(12,B)
end program test
!=======================================================================
subroutine InitB2(nMult,B)
use, intrinsic :: iso_fortran_env, only: real64
implicit none
integer, parameter :: natom = 3, nAtQM = 3
integer :: nMult, IsMM(natom), iQM, jAt, nOrd
real(kind=real64) :: B, Ext(4,natom), TTT(12)
IsMM(:) = 0
TTT(:) = [ 0.0996240187701627_real64, -0.0678010789526136_real64, -0.0343428927289087_real64, -0.0772705078426976_real64, &
-0.1184159952122723_real64, -0.1835719229968422_real64, -0.0456886660060007_real64, -0.0287182833201296_real64, &
0.0250761796854473_real64, -0.0013503667449997_real64, 0.0067120114148270_real64, -0.0191993678682207_real64]
Ext(1,:) = [ 0.0497650609946744_real64, -0.0011448171391590_real64, 0.0233845240594584_real64]
Ext(2,:) = [ 0.0249820632156247_real64, 0.0379349209889440_real64, 0.0132155001662827_real64]
Ext(3,:) = [ 0.0055450458327068_real64, 0.0108261515099825_real64, -0.0010805609431091_real64]
Ext(4,:) = [-0.0119043865891981_real64, 0.0116827835572062_real64, -0.0134650174110233_real64]
nOrd = nMult/nAtQM
iQM = 0
B = 0.0_real64
do jAt=1,natom
if (IsMM(jAt) == 1) cycle
iQM = iQM+1
B = B+Ext(1,jAt)*TTT(nOrd*(iQM-1)+1)
if (nOrd > 1) B = B+Ext(2,jAt)*TTT(nOrd*(iQM-1)+2)+Ext(3,jAt)*TTT(nOrd*(iQM-1)+3)+Ext(4,jAt)*TTT(nOrd*(iQM-1)+4)
end do
write(6,*) B
end subroutine InitB2
$ ifx -O0 a.f90 && ./a.out
-2.845138188721881E-003
$ ifx -O2 a.f90 && ./a.out
-1.328284734601879E-002
Although I'm comparing results with ifort, my concern is the very different results obtained with ifx -O0 and ifx -O2. I understand different optimizations can give unexpectedly different results (e.g. a+b-b, if b >> a, may be equal to a or to 0). But some compiler behaviour looks more like a bug to me. For instance, in the first example, if the nx, ny, nz declaration is moved from the module to the subroutine, the results with -O0 and -O2 become the same. I'm sure it has a logical explanation from the compiler's point of view, but I doubt that's what users expect.
I'll try -fltconsistency, thanks (it doesn't seem to fix this "second" case).
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The compiler optimization would likely remove code generated for line 39 as it will always be false.
Likely, the conditional part of line 42 would be removed as it is always true (leaving the statement part)
There may be a code generation error between/amongst lines 41 and f2.
To test this supposition, insert between lines 41 and 42
if(b == 3.14159) print *, "boo" ! don't expect this to print
What this will (should) cause is for the compiler optimization to .NOT. collapse former lines 41 and 42.
Then run the -o0 and -02 tests.
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Jim and Andrew:
Would the code not be better if R2 was broken into several variables
a = b*c
d =e+f
g = a**d
rather than one line.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
John, I wasn't suggesting a better method, rather I was suggesting a means to test a hypothesis, and in turn, aid the support team in diagnosing the issue.
Ron's later post on the observation of different (correct) results attained by moving code from the module to an external subroutine is of more interest.
What may additionally be related is that this (error) may be a result of IPO. The user can also try the error prone code and explicitly turn off IPO.
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Jim:
Sorry, mine was a theoretical question rephrased, is it better for the compiler to break R2 into short components and then add the components or leave a very long equation that is really easy to make a mistake in? For my education.
I was amused by @mecej4 comment about knowing something about electochemistry before fixing the code, an engineer yesterday was complaining that engineers will do anything, even if they know naught about it. I agreed with her.
John
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I think it is better for humans to break things up into smaller bits to make it easier to understand and get right. The compiler should just apply a set of evaluation rules and it is down to the user to take care if working with a mix of big and small numbers. The case I pointed out was SMALLNO+BIGNO-BIGNO which dependant order order will yield SMALLNO or zero. Strict standard Fortran would be left to right order of addition/subtraction which will give the mathematically wrong answer. SMALLNO+(BIGNO-BIGNO) will give the right answer but note some optimisations do not respect brackets. /Qprotect-parens is your friend.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Yes, that line "fixes" the problem. Here's a simplified version:
program test
implicit none
real :: B
call InitB2(8,B,.true.)
end program test
!=======================================================================
subroutine InitB2(nMult,B,MultiP)
use, intrinsic :: iso_fortran_env, only: real64
implicit none
integer, parameter :: natom = 2, nAtQM = 2
integer :: nMult, IsMM(natom), iQM, jAt
logical :: MultiP
real :: B, TTT(nMult)
IsMM(:) = 0
TTT(:) = [ 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
iQM = 0
B = 0.0
do jAt=1,natom
if (IsMM(jAt) == 1) cycle
iQM = iQM+1
B = B+TTT(4*(iQM-1)+1)
if (MultiP) B = B+10.0
end do
write(6,*) B
end subroutine InitB2
$ ifx -O0 -standard-semantics -fltconsistency a.f90 && ./a.out
26.00000
$ ifx -O2 -standard-semantics -fltconsistency a.f90 && ./a.out
25.00000
Now, it's true that line 29 is a no-op, but removing it makes the failure go away. Same with the test in line 32. Or changing iQM to jAt in line 31. Or replacing TTT(...) with real(...) in line 31.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
well the first example produces consistent results /Od /O3 on windows with /standard-semantics
The inconsistent results without that option come on the R2 calculation at the end . I did not drill into which of the standard-semantics options makes the difference, I did try Qprotect-parens but it isn't that.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
"if the nx, ny, nz declaration is moved from the module to the subroutine, the results with -O0 and -O2 become the same. "
This changes the way references are made to those variables. THis is interesting. It could be a bug.
I am now leaning on this being at least 1 bug, maybe more.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
In the first example the difference is in line 79 in the third term which is shown above. With optimised and non-optimised code the components are exactly the same it comes down to (1.028 + 1.0e99 - 1.0E99) the optimiser changes the computation order as we now it can and as we know can be unsafe ( the manual says so). It seems to me this code is specifically designed to find such aberrations.
Ironically the mathematically correct answer is 1.028^2 and the optimised code is "correct" and the non-optimised code is "wrong". However the real answer is by the rules of Fortran evaluation order the correct answer is not the mathematically correct answer! For me that is not a Fortran bug it is the coding bug if the difference is significant in the context of the calculation.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
RE: /Qprotect-parens
IMHO the code should default to protect parens, and the user could then issue /Qnoprotect-parens if they so (foolishly) desire to do so.
Even in fundamental math courses, parens are there to control sequence of operations. Fortran should not deviate from this.
Jim Dempsey
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
That is standard other than with O3 I think but is also enforced with /standard-semantics
Option standard-semantics enables option fpscomp logicals and the following settings for option assume: byterecl, failed_images, fpe_summary, ieee_compares, ieee_fpe_flags (if the fp-model option setting is strict or precise), minus0, nan_compares, noold_inquire_recl, noold_ldout_format, noold_ldout_zero, noold_maxminloc, noold_unit_star, noold_xor, protect_parens, realloc_lhs, recursion, std_intent_in, std_minus0_rounding, std_mod_proc_name, and std_value.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
No, /assume:protect_parens is not the default at any optimization level. It is implied by /standard-semantics, as noted.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Steve, I am aware that /assume:protect_parens is not the default at any optimization level, my argument is that when a programmer goes to the effort of inserting parenthesis into the code, that the compiler should honor the parenthesis unless told not to.
If the compiler is so brash as to (without permission) throw out parenthesis, why not throw out operator precedence too? Some speed could be attained by doing that too.
A faster benchmark with unacceptable results is of little use.
Jim
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page