Intel® Fortran Compiler
Build applications that can scale for the future with optimized code designed for Intel® Xeon® and compatible processors.

Strange OMP behaviour

IanH
Honored Contributor III
988 Views

After some recent changes to a program (and compiler version) I started to see some strange behaviour in OpenMP enabled builds. While debugging I've been able to come up with a smaller example that generates similar (but not quite the same) sort of problems. I'm now a bit worried that my understanding of OpenMP behaviour is flawed. All the odd behaviour happens before the code encounters any parallel constructs.

The following crashes with an access violation on the line after the "do something really useful" comment if compiled with OpenMP enabled. Something seems to be stomping on the argument list for call2. I know that OpenMP debugging can be difficult (from painful experience), but this all seems ok to me.

[cpp]MODULE array_sizes
IMPLICIT NONE
! Problem sizes
INTEGER :: sz1
INTEGER :: sz2
END MODULE array_sizes

MODULE big_mod
IMPLICIT NONE
CONTAINS
SUBROUTINE call1
USE array_sizes
REAL, ALLOCATABLE:: big_array(:) ! Our work array
INTEGER :: pos(3) ! Indexes into work array
INTEGER :: total
! Set problem size
sz1 = 3
sz2 = 5
! Allocate work array
pos(1) = 1 ! array one is dim (sz1)
pos(2) = pos(1) + sz1 ! array two is dim (sz1,sz2)
pos(3) = pos(2) + sz1 * sz2 ! array three is dim (sz1)
total = pos(3) + sz1 - 1
ALLOCATE(big_array(total))
WRITE(*,"('Allocated: ',I4)"), total
WRITE(*,"(A10,3Z10.8)") 'call1', LOC(big_array(pos(1))), &
LOC(big_array(pos(2))), &
LOC(big_array(pos(3)))
CALL call2(big_array(pos(1)), big_array(pos(2)), big_array(pos(3)))
WRITE(*,"(F12.1)") big_array
END SUBROUTINE call1

SUBROUTINE call2(arg1, arg2, arg3)
USE array_sizes
REAL, INTENT(OUT) :: arg1(sz1)
REAL, INTENT(OUT) :: arg2(sz1,sz2)
REAL, INTENT(OUT) :: arg3(sz1)
WRITE(*,"(A10,3(Z10.8))") 'call2', LOC(arg1), LOC(arg2), LOC(arg3)
! do something really usefull...
arg1 = 11.0
arg2 = 101.0
!CALL other_never_called(sz1, arg1, arg3)
CONTAINS
SUBROUTINE contained_never_called
WRITE(*,"(A10)") 'arg1:', arg1
END SUBROUTINE contained_never_called
END SUBROUTINE call2

SUBROUTINE other_never_called(n1, arg1, arg3)
INTEGER, INTENT(IN) :: n1
REAL, INTENT(INOUT) :: arg1(n1)
REAL, INTENT(INOUT) :: arg3(n1)
INTEGER i
!$OMP PARALLEL DO DEFAULT(NONE), SHARED(arg1, arg3, n1)
DO i = 1,n1
arg3(i) = arg1(i) * 2.5
END DO
!$OMP END PARALLEL DO
END SUBROUTINE other_never_called

END MODULE big_mod

PROGRAM DebugOMPTest
USE big_mod
IMPLICIT NONE
!******
CALL call1
END PROGRAM[/cpp]

This works if:

  • you compile without OpenMP
  • you comment out SUBROUTINE other_never_called, or
  • you comment out the internal subroutine contained_never_called
  • you use version 10.1.025 rather than 11.0.066

Any ideas (particularly whether my use of "shared" module variables for the array sizes is valid when using OpenMP, even when they are not used in the lexical scope of the construct)?

Thanks,

IanH

0 Kudos
8 Replies
jimdempseyatthecove
Honored Contributor III
988 Views
Quoting - IanH

After some recent changes to a program (and compiler version) I started to see some strange behaviour in OpenMP enabled builds. While debugging I've been able to come up with a smaller example that generates similar (but not quite the same) sort of problems. I'm now a bit worried that my understanding of OpenMP behaviour is flawed. All the odd behaviour happens before the code encounters any parallel constructs.

The following crashes with an access violation on the line after the "do something really useful" comment if compiled with OpenMP enabled. Something seems to be stomping on the argument list for call2. I know that OpenMP debugging can be difficult (from painful experience), but this all seems ok to me.

[cpp]MODULE array_sizes
IMPLICIT NONE
! Problem sizes
INTEGER :: sz1
INTEGER :: sz2
END MODULE array_sizes

MODULE big_mod
IMPLICIT NONE
CONTAINS
SUBROUTINE call1
USE array_sizes
REAL, ALLOCATABLE:: big_array(:) ! Our work array
INTEGER :: pos(3) ! Indexes into work array
INTEGER :: total
! Set problem size
sz1 = 3
sz2 = 5
! Allocate work array
pos(1) = 1 ! array one is dim (sz1)
pos(2) = pos(1) + sz1 ! array two is dim (sz1,sz2)
pos(3) = pos(2) + sz1 * sz2 ! array three is dim (sz1)
total = pos(3) + sz1 - 1
ALLOCATE(big_array(total))
WRITE(*,"('Allocated: ',I4)"), total
WRITE(*,"(A10,3Z10.8)") 'call1', LOC(big_array(pos(1))), &
LOC(big_array(pos(2))), &
LOC(big_array(pos(3)))
CALL call2(big_array(pos(1)), big_array(pos(2)), big_array(pos(3)))
WRITE(*,"(F12.1)") big_array
END SUBROUTINE call1

SUBROUTINE call2(arg1, arg2, arg3)
USE array_sizes
REAL, INTENT(OUT) :: arg1(sz1)
REAL, INTENT(OUT) :: arg2(sz1,sz2)
REAL, INTENT(OUT) :: arg3(sz1)
WRITE(*,"(A10,3(Z10.8))") 'call2', LOC(arg1), LOC(arg2), LOC(arg3)
! do something really usefull...
arg1 = 11.0
arg2 = 101.0
!CALL other_never_called(sz1, arg1, arg3)
CONTAINS
SUBROUTINE contained_never_called
WRITE(*,"(A10)") 'arg1:', arg1
END SUBROUTINE contained_never_called
END SUBROUTINE call2

SUBROUTINE other_never_called(n1, arg1, arg3)
INTEGER, INTENT(IN) :: n1
REAL, INTENT(INOUT) :: arg1(n1)
REAL, INTENT(INOUT) :: arg3(n1)
INTEGER i
!$OMP PARALLEL DO DEFAULT(NONE), SHARED(arg1, arg3, n1)
DO i = 1,n1
arg3(i) = arg1(i) * 2.5
END DO
!$OMP END PARALLEL DO
END SUBROUTINE other_never_called

END MODULE big_mod

PROGRAM DebugOMPTest
USE big_mod
IMPLICIT NONE
!******
CALL call1
END PROGRAM[/cpp]

This works if:

  • you compile without OpenMP
  • you comment out SUBROUTINE other_never_called, or
  • you comment out the internal subroutine contained_never_called
  • you use version 10.1.025 rather than 11.0.066

Any ideas (particularly whether my use of "shared" module variables for the array sizes is valid when using OpenMP, even when they are not used in the lexical scope of the construct)?

Thanks,

IanH

I do note that USE OMP_LIB is not included in the subroutine that has the !$OMP...

You might try inserting it there (as well as a stab in the dark by inserting it into the PROGRAM section).

Also, try inserting a RETURN in front of the CONTAINS in CALL2

When all else fails, open the dissassembly window and follow the code using single step into (excepting for the library calls and WRITEs etc...)

Jim Dempsey

0 Kudos
IanH
Honored Contributor III
988 Views

I do note that USE OMP_LIB is not included in the subroutine that has the !$OMP...

You might try inserting it there (as well as a stab in the dark by inserting it into the PROGRAM section).

Also, try inserting a RETURN in front of the CONTAINS in CALL2

When all else fails, open the dissassembly window and follow the code using single step into (excepting for the library calls and WRITEs etc...)

Jim Dempsey

Thanks for the comments. I've ended up having to walk through the assembly, comparing a normal debug build (no compile time checks) with the open build.

From what I can tell, the generated code for the assignment to arg1 (the first executable statement in call2) expects the address of arg1 to be in a certain stack location. But the prolog code for call2 (eventually - with omp enabled the entry code for call2 balloons out massively and I got fed up with stepping...) puts something else there, that looks suspiciously like bit flags that might be part of an array descriptor (0x03000001). Bad things then happen.

I can also make the problem go away by removing arg1 from the parameter list of the write statement in the internal contained function (contained_never_called). (My understanding is that without a return statement before the contains, this contained function should still never be called). I think the compiler is getting confused.

I'll log this with premier support and see what they have to say.

0 Kudos
jimdempseyatthecove
Honored Contributor III
988 Views

Excepting for arrays in COMMON (or SAVE fixed size, or global scoped fixed size) arrays all arrays will have an array descriptor. When you specify the argument in CALL2 as arg1(sz1) where arg1 comes in as reference and sz1 comes in as reference, the compiler will construct an array descriptor for the array arg1 (IMHO the compiler optimization could optimize it out, but then the Debugging access to the array would then be messed up).

If you go to the 1st statement after the subroutine prolog is arg1 messed up? (examine arg1(1) to see if it contains the correct data). If it does then the probelem is elsewhere.

Jim Dempsey

0 Kudos
jimdempseyatthecove
Honored Contributor III
988 Views

Also try

WRITE(*,"(A10,3(Z10.8))")'call2',LOC(arg1(1)),LOC(arg2(1)),LOC(arg3(1))
WRITE(*,*) sz1

0 Kudos
IanH
Honored Contributor III
988 Views

Excepting for arrays in COMMON (or SAVE fixed size, or global scoped fixed size) arrays all arrays will have an array descriptor. When you specify the argument in CALL2 as arg1(sz1) where arg1 comes in as reference and sz1 comes in as reference, the compiler will construct an array descriptor for the array arg1 (IMHO the compiler optimization could optimize it out, but then the Debugging access to the array would then be messed up).

If you go to the 1st statement after the subroutine prolog is arg1 messed up? (examine arg1(1) to see if it contains the correct data). If it does then the probelem is elsewhere.

Jim Dempsey

Thanks. In addition to your diagnostics there are further simplifications below (no floating point, single argument, explicit shape work array, very boring standard "pass by reference" of that array). Including the diagnostic WRITE there's now only six executable statements before this thing explodes. I've left out the use OMP_LIB and trailing RETURN as they did not appear to change the generated code.

LOC(arg1(1)) says 0x03000001, which, given the way its put together by a series of "or" instructions in the disassembly, look like bit flags to me. arg1(1) has lost it's expected initial value of 9 according to the debugger. Putting a write statement in with arg1(1) as an argument also causes an access violation.

(To be honest I have no idea what the memory layout of an array descriptor is, so this could be something unrelated to the array).

I'll report back if premier support find anything astray. In the meantime the other cores on my machine can spend their time decoding mp3 files rather than running models. That still keeps me entertained.

Thanks,

IanH

[cpp]! OpenMP test

MODULE array_sizes
  IMPLICIT NONE
  ! Problem sizes
  INTEGER :: sz
END MODULE array_sizes

MODULE big_mod
  IMPLICIT NONE
CONTAINS
  SUBROUTINE call1
    USE array_sizes
    INTEGER :: big_array(sz)
    big_array = 9
    CALL call2(big_array)
  END SUBROUTINE call1

  SUBROUTINE call2(arg1)
    USE array_sizes
    INTEGER, INTENT(INOUT) :: arg1(sz)
    ! do something
    WRITE(*,"(A10,Z10.8))") 'call2:', LOC(arg1(1))
    arg1 = 11    
  CONTAINS
    SUBROUTINE contained_never_called
      WRITE(*,"(A10,I4)") 'arg1:', arg1(1)
    END SUBROUTINE contained_never_called
  END SUBROUTINE call2
  
  SUBROUTINE other_never_called
    INTEGER i
    !$OMP PARALLEL DO DEFAULT(NONE)
    DO i = 1,10; END DO    
    !$OMP END PARALLEL DO
  END SUBROUTINE other_never_called

END MODULE big_mod

PROGRAM DebugOMPTest
  USE big_mod
  USE array_sizes
  IMPLICIT NONE  
  ! Set problem size
  sz = 3         
  CALL call1
END PROGRAM
[/cpp]

0 Kudos
jimdempseyatthecove
Honored Contributor III
988 Views

IanH,

You might pass this on to Premier Support (check OUTSIDE_call2 which works)

[cpp]!  OMPbug.f90 
!
!  FUNCTIONS:
!  OMPbug - Entry point of console application.
!

!****************************************************************************
!
!  PROGRAM: OMPbug
!
!  PURPOSE:  Entry point for the console application.
!
!****************************************************************************


! OpenMP test   
  
MODULE array_sizes   
  IMPLICIT NONE   
  ! Problem sizes   
  INTEGER :: sz   
END MODULE array_sizes   
  
MODULE big_mod   
  IMPLICIT NONE   
CONTAINS   
  SUBROUTINE call2(arg1)   
    USE array_sizes   
    INTEGER, INTENT(INOUT) :: arg1(sz)   
    ! do something   
    WRITE(*,"(A10,Z10.8))") 'call2:', LOC(arg1(1))   
    arg1 = 11       
  CONTAINS   
    SUBROUTINE contained_never_called   
      WRITE(*,"(A10,I4)") 'arg1:', arg1(1)   
    END SUBROUTINE contained_never_called   
  END SUBROUTINE call2   
     
  SUBROUTINE call1   
    USE array_sizes   
    INTEGER :: big_array(sz) 
    big_array = 9   
    CALL OUTSIDE_call2(big_array)  
    CALL call2(big_array)  
  END SUBROUTINE call1   
  
  SUBROUTINE other_never_called   
    INTEGER i   
    !$OMP PARALLEL DO DEFAULT(NONE)   
    DO i = 1,10; END DO       
    !$OMP END PARALLEL DO   
  END SUBROUTINE other_never_called   
  
END MODULE big_mod   
  
PROGRAM DebugOMPTest   
  USE big_mod   
  USE array_sizes   
  IMPLICIT NONE     
  ! Set problem size   
  sz = 3            
  CALL call1   
END PROGRAM  

SUBROUTINE OUTSIDE_call2(arg1)   
USE array_sizes   
INTEGER, INTENT(INOUT) :: arg1(sz)   
! do something   
WRITE(*,"(A10,Z10.8))") 'call2:', LOC(arg1(1))   
arg1 = 11       
CONTAINS   
SUBROUTINE contained_never_called   
  WRITE(*,"(A10,I4)") 'arg1:', arg1(1)   
END SUBROUTINE contained_never_called   
END SUBROUTINE OUTSIDE_call2   

[/cpp]

Jim Dempsey

0 Kudos
dajum
Novice
988 Views
Was this ever solved?
0 Kudos
IanH
Honored Contributor III
988 Views
I've found a premier support submission of mine that matches this. Apparently it only occurred when debug was enabled and it was reported as fixed in 11.1 update two.
0 Kudos
Reply