- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Intel
I have found a strange bug in MVAPICH2 using IFORT. The behavior is very strange indeed - it seems to be related to how ifort deals with passing pointers to the MVAPICH FORTRAN 90 INTERFACE.
The MPI call returns successfully, but later calls to a dummy subroutine cause a sigsev.
Please look at the following code - sorry if the post caused formatting issues, I don't have permission to attach the file.
!=================================================================================
!=================================================================================
!=================================================================================
! TEST CODE TO FOR POSSIBLE BUG IN MVAPICH2 COMPILED ON IFORT
! WRITEN BY: DAVID STUEBE
! DATE: JAN 23, 2008
!
! COMPILE WITH: mpif90 -xP mpi_prog.f90 -o xtest
!
! KNOWN BEHAVIOR:
! PASSING A NONE CONTIGUOUS POINTER TO MPI_BCAST CAUSES FAILURE OF
! SUBROUTINES USING MULTI DIMENSIONAL EXPLICT SHAPE ARRAYS WITHOUT AN INTERFACE -
! EVEN THOUGH THE MPI_BCAST COMPLETES SUCCESUFULLY, RETURNING VALID DATA.
!
! COMMENTS:
! I REALIZE PASSING NON CONTIGUOUS POINTERS IS DANGEROUS - SHAME ON
! ME FOR MAKING THAT MISTAKE. HOWEVER, IT SHOULD EITHER WORK OR NOT.
! RETURNING SUCCESSFULLY BUT CAUSING INTERFACE ERRORS LATER IS
! EXTREMELY DIFFICULT TO DEBUG!
!
! CONDITIONS FOR OCCURANCE:
! COMPILER MUST OPTIMIZE USING 'VECTORIZATION'
! ARRAY MUST BE 'LARGE' -SYSTEM DEPENDENT ?
! MUST BE RUN ON MORE THAN ONE NODE TO CAUSE CRASH...
! ie Running inside one SMP box does not crash.
!
! RUNNING UNDER MPD, ALL PROCESSES EXIT WHEN THEY HIT DUMMY2
! RUNNING UNDER MPIEXEC0.82 FOR PBS,
! ONLY SOME PROCESSES EXIT WHEN THEY HIT DUMMY2
!
! ENVIRONMENTAL INFO:
! NODES: DELL 1850 3.0GHZ, 2GB RAM, INFINIBAND PCI-EX 4X
! SYSTEM: ROCKS 4.2
! gcc version 3.4.6 20060404 (Red Hat 3.4.6-3)
!
! IFORT/ICC:
! Intel Fortran Compiler for Intel EM64T-based applications,
! Version 9.1 Build 20061101 Package ID: l_fc_c_9.1.040
!
! MVAPICH2: mpif90 for mvapich2-1.0
! ./configure --prefix=/usr/local/share/mvapich2/1.0 --with-device=osu_ch3:mrail --with-rdma=vapi --with-pm=mpd --enable-f90 --enable-cxx --disable-romio --without-mpe
!
!=================================================================================
!=================================================================================
!=================================================================================
Module vars
USE MPI
implicit none
integer :: n,m,MYID,NPROCS
integer :: ipt
integer, allocatable, target :: data(:,:)
contains
subroutine alloc_vars
implicit none
integer Status
allocate(data(n,m),stat=status)
if (status /=0) then
write(ipt,*) "alloc ation error"
stop
end if
data = 0
end subroutine alloc_vars
SUBROUTINE INIT_MPI_ENV(ID,NP)
!===================================================================================|
! INITIALIZE MPI ENVIRONMENT |
!===================================================================================|
INTEGER, INTENT(OUT) :: ID,NP
INTEGER IERR
IERR=0
CALL MPI_INIT(IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_INIT", ID
CALL MPI_COMM_RANK(MPI_COMM_WORLD,ID,IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_RANK", ID
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NP,IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_SIZE", ID
END SUBROUTINE INIT_MPI_ENV
!==============================================================================|
SUBROUTINE PSHUTDOWN
!==============================================================================|
INTEGER IERR
IERR=0
CALL MPI_FINALIZE(IERR)
if(ierr /=0) write(ipt,*) "BAD MPI_FINALIZE", MYID
close(IPT)
STOP
END SUBROUTINE PSHUTDOWN
SUBROUTINE CONTIGUOUS_WORKS
IMPLICIT NONE
INTEGER, pointer :: ptest(:,:)
INTEGER :: IERR, I,J
write(ipt,*) "START CONTIGUOUS:"
n=2000 ! Set size here
m=n+10
call alloc_vars
write(ipt,*) "ALLOCATED DATA"
ptest => data(1:N,1:N)
IF (MYID == 0) ptest=6
write(ipt,*) "Made POINTER"
call MPI_BCAST(ptest,N*N,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST", MYID
write(ipt,*) "BROADCAST Data; a value:",data(1,6)
DO I = 1,N
DO J = 1,N
if(data(I,J) /= 6) &
& write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
END DO
&n bsp;
DO J = N+1,M
if(data(I,J) /= 0) &
& write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
END DO
END DO
! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
! THAT USE AN EXPLICIT SHAPE ARRAY
write(ipt,*) "CALLING DUMMY1"
CALL DUMMY1
write(ipt,*) "CALLING DUMMY2"
call Dummy2(m,n)
write(ipt,*) "CALLING DUMMY3"
call Dummy3
write(ipt,*) "FINISHED!"
END SUBROUTINE CONTIGUOUS_WORKS
SUBROUTINE NON_CONTIGUOUS_FAILS
IMPLICIT NONE
INTEGER, pointer :: ptest(:,:)
INTEGER :: IERR, I,J
write(ipt,*) "START NON_CONTIGUOUS:"
m=200 ! Set size here
n=m+10
call alloc_vars
write(ipt,*) "ALLOCATED DATA"
ptest => data(1:M,1:M)
!===================================================
! IF YOU CALL DUMMY2 HERE TOO, THEN EVERYTHING PASSES ???
!===================================================
! CALL DUMMY1 ! THIS ONE HAS NO EFFECT
! CALL DUMMY2 ! THIS ONE 'FIXES' THE BUG
IF (MYID == 0) ptest=6
write(ipt,*) "Made POINTER"
call MPI_BCAST(ptest,M*M,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST"
write(ipt,*) "BROADCAST Data; a value:",data(1,6)
DO I = 1,M
DO J = 1,M
if(data(J,I) /= 6) &
& write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
END DO
DO J = M+1,N
if(data(J,I) /= 0) &
& write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
END DO
END DO
! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
! THAT USE AN EXPLICIT SHAPE ARRAY
write(ipt,*) "CALLING DUMMY1"
CALL DUMMY1
write(ipt,*) "CALLING DUMMY2"
call Dummy2(m,n) ! SHOULD CRASH HERE!
write(ipt,*) "CALLING DUMMY3"
call Dummy3
write(ipt,*) "FINISHED!"
END SUBROUTINE NON_CONTIGUOUS_FAILS
End Module vars
Program main
USE vars
implicit none
CALL INIT_MPI_ENV(MYID,NPROCS)
ipt=myid+10
OPEN(ipt)
write(ipt,*) "Start memory test!"
CALL NON_CONTIGUOUS_FAILS
! CALL CONTIGUOUS_WORKS
write(ipt,*) "End memory test!"
CALL PSHUTDOWN
END Program main
! TWO DUMMY SUBROUTINE WITH EXPLICIT SHAPE ARRAYS
! DUMMY1 DECLARES A VECTOR - THIS ONE NEVER CAUSES FAILURE
! DUMMY2 DECLARES AN ARRAY - THIS ONE CAUSES FAILURE
SUBROUTINE DUMMY1
USE vars
implicit none
real, dimension(m) :: my_data
write(ipt,*) "m,n",m,n
write(ipt,*) "DUMMY 1", size(my_data)
END SUBROUTINE DUMMY1
SUBROUTINE DUMMY2(i,j)
USE vars
implicit none
INTEGER, INTENT(IN) ::i,j
real, dimension(i,j) :: my_data
write(ipt,*) "start: DUMMY 2", size(my_data)
END SUBROUTINE DUMMY2
SUBROUTINE DUMMY3
USE vars
implicit none
real, dimension(m,n) :: my_data
write(ipt,*) "start: DUMMY 3", size(my_data)
END SUBROUTINE DUMMY3
I have found a strange bug in MVAPICH2 using IFORT. The behavior is very strange indeed - it seems to be related to how ifort deals with passing pointers to the MVAPICH FORTRAN 90 INTERFACE.
The MPI call returns successfully, but later calls to a dummy subroutine cause a sigsev.
Please look at the following code - sorry if the post caused formatting issues, I don't have permission to attach the file.
!=================================================================================
!=================================================================================
!=================================================================================
! TEST CODE TO FOR POSSIBLE BUG IN MVAPICH2 COMPILED ON IFORT
! WRITEN BY: DAVID STUEBE
! DATE: JAN 23, 2008
!
! COMPILE WITH: mpif90 -xP mpi_prog.f90 -o xtest
!
! KNOWN BEHAVIOR:
! PASSING A NONE CONTIGUOUS POINTER TO MPI_BCAST CAUSES FAILURE OF
! SUBROUTINES USING MULTI DIMENSIONAL EXPLICT SHAPE ARRAYS WITHOUT AN INTERFACE -
! EVEN THOUGH THE MPI_BCAST COMPLETES SUCCESUFULLY, RETURNING VALID DATA.
!
! COMMENTS:
! I REALIZE PASSING NON CONTIGUOUS POINTERS IS DANGEROUS - SHAME ON
! ME FOR MAKING THAT MISTAKE. HOWEVER, IT SHOULD EITHER WORK OR NOT.
! RETURNING SUCCESSFULLY BUT CAUSING INTERFACE ERRORS LATER IS
! EXTREMELY DIFFICULT TO DEBUG!
!
! CONDITIONS FOR OCCURANCE:
! COMPILER MUST OPTIMIZE USING 'VECTORIZATION'
! ARRAY MUST BE 'LARGE' -SYSTEM DEPENDENT ?
! MUST BE RUN ON MORE THAN ONE NODE TO CAUSE CRASH...
! ie Running inside one SMP box does not crash.
!
! RUNNING UNDER MPD, ALL PROCESSES EXIT WHEN THEY HIT DUMMY2
! RUNNING UNDER MPIEXEC0.82 FOR PBS,
! ONLY SOME PROCESSES EXIT WHEN THEY HIT DUMMY2
!
! ENVIRONMENTAL INFO:
! NODES: DELL 1850 3.0GHZ, 2GB RAM, INFINIBAND PCI-EX 4X
! SYSTEM: ROCKS 4.2
! gcc version 3.4.6 20060404 (Red Hat 3.4.6-3)
!
! IFORT/ICC:
! Intel Fortran Compiler for Intel EM64T-based applications,
! Version 9.1 Build 20061101 Package ID: l_fc_c_9.1.040
!
! MVAPICH2: mpif90 for mvapich2-1.0
! ./configure --prefix=/usr/local/share/mvapich2/1.0 --with-device=osu_ch3:mrail --with-rdma=vapi --with-pm=mpd --enable-f90 --enable-cxx --disable-romio --without-mpe
!
!=================================================================================
!=================================================================================
!=================================================================================
Module vars
USE MPI
implicit none
integer :: n,m,MYID,NPROCS
integer :: ipt
integer, allocatable, target :: data(:,:)
contains
subroutine alloc_vars
implicit none
integer Status
allocate(data(n,m),stat=status)
if (status /=0) then
write(ipt,*) "alloc ation error"
stop
end if
data = 0
end subroutine alloc_vars
SUBROUTINE INIT_MPI_ENV(ID,NP)
!===================================================================================|
! INITIALIZE MPI ENVIRONMENT |
!===================================================================================|
INTEGER, INTENT(OUT) :: ID,NP
INTEGER IERR
IERR=0
CALL MPI_INIT(IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_INIT", ID
CALL MPI_COMM_RANK(MPI_COMM_WORLD,ID,IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_RANK", ID
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NP,IERR)
IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_SIZE", ID
END SUBROUTINE INIT_MPI_ENV
!==============================================================================|
SUBROUTINE PSHUTDOWN
!==============================================================================|
INTEGER IERR
IERR=0
CALL MPI_FINALIZE(IERR)
if(ierr /=0) write(ipt,*) "BAD MPI_FINALIZE", MYID
close(IPT)
STOP
END SUBROUTINE PSHUTDOWN
SUBROUTINE CONTIGUOUS_WORKS
IMPLICIT NONE
INTEGER, pointer :: ptest(:,:)
INTEGER :: IERR, I,J
write(ipt,*) "START CONTIGUOUS:"
n=2000 ! Set size here
m=n+10
call alloc_vars
write(ipt,*) "ALLOCATED DATA"
ptest => data(1:N,1:N)
IF (MYID == 0) ptest=6
write(ipt,*) "Made POINTER"
call MPI_BCAST(ptest,N*N,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST", MYID
write(ipt,*) "BROADCAST Data; a value:",data(1,6)
DO I = 1,N
DO J = 1,N
if(data(I,J) /= 6) &
& write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
END DO
&n bsp;
DO J = N+1,M
if(data(I,J) /= 0) &
& write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
END DO
END DO
! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
! THAT USE AN EXPLICIT SHAPE ARRAY
write(ipt,*) "CALLING DUMMY1"
CALL DUMMY1
write(ipt,*) "CALLING DUMMY2"
call Dummy2(m,n)
write(ipt,*) "CALLING DUMMY3"
call Dummy3
write(ipt,*) "FINISHED!"
END SUBROUTINE CONTIGUOUS_WORKS
SUBROUTINE NON_CONTIGUOUS_FAILS
IMPLICIT NONE
INTEGER, pointer :: ptest(:,:)
INTEGER :: IERR, I,J
write(ipt,*) "START NON_CONTIGUOUS:"
m=200 ! Set size here
n=m+10
call alloc_vars
write(ipt,*) "ALLOCATED DATA"
ptest => data(1:M,1:M)
!===================================================
! IF YOU CALL DUMMY2 HERE TOO, THEN EVERYTHING PASSES ???
!===================================================
! CALL DUMMY1 ! THIS ONE HAS NO EFFECT
! CALL DUMMY2 ! THIS ONE 'FIXES' THE BUG
IF (MYID == 0) ptest=6
write(ipt,*) "Made POINTER"
call MPI_BCAST(ptest,M*M,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST"
write(ipt,*) "BROADCAST Data; a value:",data(1,6)
DO I = 1,M
DO J = 1,M
if(data(J,I) /= 6) &
& write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
END DO
DO J = M+1,N
if(data(J,I) /= 0) &
& write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
END DO
END DO
! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
! THAT USE AN EXPLICIT SHAPE ARRAY
write(ipt,*) "CALLING DUMMY1"
CALL DUMMY1
write(ipt,*) "CALLING DUMMY2"
call Dummy2(m,n) ! SHOULD CRASH HERE!
write(ipt,*) "CALLING DUMMY3"
call Dummy3
write(ipt,*) "FINISHED!"
END SUBROUTINE NON_CONTIGUOUS_FAILS
End Module vars
Program main
USE vars
implicit none
CALL INIT_MPI_ENV(MYID,NPROCS)
ipt=myid+10
OPEN(ipt)
write(ipt,*) "Start memory test!"
CALL NON_CONTIGUOUS_FAILS
! CALL CONTIGUOUS_WORKS
write(ipt,*) "End memory test!"
CALL PSHUTDOWN
END Program main
! TWO DUMMY SUBROUTINE WITH EXPLICIT SHAPE ARRAYS
! DUMMY1 DECLARES A VECTOR - THIS ONE NEVER CAUSES FAILURE
! DUMMY2 DECLARES AN ARRAY - THIS ONE CAUSES FAILURE
SUBROUTINE DUMMY1
USE vars
implicit none
real, dimension(m) :: my_data
write(ipt,*) "m,n",m,n
write(ipt,*) "DUMMY 1", size(my_data)
END SUBROUTINE DUMMY1
SUBROUTINE DUMMY2(i,j)
USE vars
implicit none
INTEGER, INTENT(IN) ::i,j
real, dimension(i,j) :: my_data
write(ipt,*) "start: DUMMY 2", size(my_data)
END SUBROUTINE DUMMY2
SUBROUTINE DUMMY3
USE vars
implicit none
real, dimension(m,n) :: my_data
write(ipt,*) "start: DUMMY 3", size(my_data)
END SUBROUTINE DUMMY3
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
Could youyou try the Intel MPI Library?
Best regards,
Andrey
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page