Intel® MPI Library
Get help with building, analyzing, optimizing, and scaling high-performance computing (HPC) applications.
2196 Discussions

forrtl: severe (174): SIGSEGV, segmentation fault occurred and MPI

JiaZhi3398
Beginner
691 Views

Dear all,

I got stuck with the following error,

------------------------------------------
forrtl: severe (174): SIGSEGV, segmentation fault occurred
Image PC Routine Line Source
out_iofrt 000000000044B67A Unknown Unknown Unknown
libpthread-2.31.s      0000147245672420     Unknown     Unknown     Unknown
libmpi.so.12.0.0        0000147245B1E534     Unknown      Unknown     Unknown
libmpi.so.12.0.0        0000147245C6C27D     Unknown      Unknown     Unknown
libmpi.so.12.0.0        0000147245E5C140      Unknown      Unknown     Unknown
libmpi.so.12.0.0        0000147245C64034      Unknown       Unknown     Unknown
libmpi.so.12.0.0        0000147245F29203      Unknown       Unknown      Unknown
libmpi.so.12.0.0        0000147245FC3315      Unknown       Unknown      Unknown
libmpi.so.12.0.0        0000147245FC43F6      Unknown       Unknown      Unknown
libmpi.so.12.0.0        0000147245E75993     Unknown        Unknown      Unknown
libmpi.so.12.0.0       0000147245E7E6E8    MPI_Sendrecv          Unknown     Unknown
libmpifort.so.12.      0000147246FC4515    pmpi_sendrecv         Unknown Unknown
out_iofrt                    000000000040BC1D     Unknown               Unknown Unknown
out_iofrt                    0000000000403922     Unknown                 Unknown Unknown
libc-2.31.so               0000147245341083 __libc_start_main                 Unknown Unknown
out_iofrt                      000000000040382E Unknown                     Unknown Unknown

------------------------------------------

I have tested that when the program runs to mpi_Sendrecv, it will report this error.

 

  1. Problem description, this program is on a private computer, configured as i9-14900kf , DDR4-64G-3000HZ,
  2. compiled with mpiifort, environment:                       /opt/intel/oneapi/compiler/2022.0.1/linux/bin/intel64/ifort
  3. mpi environment:   / opt/intel/oneapi/mpi/2021.5.0//bin/mpirun

 

The first time to run the program can be successful, when the program is interrupted, run the program again. The above bug appears when the program reads the data before the program is interrupted.

It is about 500mb for a program to read a single data. The number of data read depends on the value of the parallel number n

 

Some of the source codes are as follows:

****************The following variables initialize******************

INTEGER  B(4),D1(4),D2(4),D3(4),D4(4),T(4),ADDR(4)
INTEGER  TAG1, TAG2,IERR
INTEGER  MYTYPE1,MYTYPE2,MYTYPE3,MYTYPE4

MNM1 = 0
TAG1 = 5
TAG2 = 6
TAG3 = 7
TAG4 = 8
D1(1) = 0
D2(1) = 0
D3(1) = 0
D4(1) = 0
B(1) = 1
B(2) = NPOSIT*MNM1
B(3) = NENERGY*MNM1
B(4) = MNM1
T(1) = MPI_INTEGER
T(2) = MPI_REAL
T(3) = MPI_REAL
T(4) = MPI_INTEGER

 

CALL MPI_COMM_RANK(MPI_COMM_WORLD,MYRANK,IERR)
MYLEFT = MYRANK - 1
IF (MYLEFT .LT.0 ) MYLEFT = MPI_PROC_NULL
MYRIGHT = MYRANK + 1
IF (MYRIGHT .GE. NPROC) MYRIGHT = MPI_PROC_NULL

 

***************The following is the function part******************

 

IF((X2(N) .LT. XMIN) .OR. (X2(N) .GT. XMAX) .OR.  (Y2(N) .LT. YMIN) .OR. (Y2(N) .GT. YMAX) .OR.  
  &    (Z2(N) .LT. ZMIN) .OR. (Z2(N) .GT. ZMAX))  THEN
           IF(KDMPI.EQ.1) THEN
             IF(X2(N) .LT. XMIN) THEN
               TOLEFT = TOLEFT + 1
             ELSE IF(X2(N) .GT. XMAX) THEN
               TORIGHT = TORIGHT + 1
             ENDIF
         ENDIF   
     IF(KC(MSC2).EQ.1) THEN
     IP(1,N) = 0
   ENDIF
100   CONTINUE    
      MNM2 = MAX(TOLEFT, TORIGHT)
      INN = MNM2
      MY = 0
      CALL MPI_REDUCE(INN, MNM2, 1, MPI_INTEGER, MPI_MAX, MY,  &
     &      MPI_COMM_WORLD, IERR)
      CALL MPI_BCAST(MNM2, 1,MPI_INTEGER,0,MPI_COMM_WORLD,IERR) 
      MNM1 = MNM2
      IF(MNM1 .LT. 1) THEN
         TOLEFT = 0
     TORIGHT = 0
         RETURN
      ENDIF
     
ALLOCATE(IPTOL(MNM1),IPTOR(MNM1))
 ALLOCATE(PPTOL(NPOSIT,MNM1),PVTOL(NENERGY,MNM1),PPTOR(NPOSIT,MNM1),PVTOR(NEN&                   ERGY,MNM1))
 ALLOCATE(PPFROML(NPOSIT,MNM1),PVFROML(NENERGY,MNM1),IPFROML(MNM1),PPFROMR(&                       NPOSIT,MNM1),PVFROMR(NENERGY,MNM1),IPFROMR(MNM1))
      IPTOL = 0
      IPTOR = 0
      IPFROML = 0
      IPFROMR = 0
      PPTOL = 0.0
      PVTOL = 0.0
      PPTOR = 0.0
      PVTOR = 0.0
      PPFROML = 0.0
      PVFROML = 0.0
      PPFROMR = 0.0
      PVFROMR = 0.0
      B(1) = 1
      B(2) = NPOSIT*MNM1
      B(3) = NENERGY*MNM1
      B(4) = MNM1
      CALL MPI_GET_ADDRESS(TOLEFT,ADDR(1),IERR)
      CALL MPI_GET_ADDRESS(PPTOL(1,1),ADDR(2),IERR)
      CALL MPI_GET_ADDRESS(PVTOL(1,1),ADDR(3),IERR)
      CALL MPI_GET_ADDRESS(IPTOL(1),ADDR(4),IERR)
      D1(2) = ADDR(2) - ADDR(1)
      D1(3) = ADDR(3) - ADDR(1)
      D1(4) = ADDR(4) - ADDR(1)
      CALL MPI_GET_ADDRESS(FROMRIGHT,ADDR(1),IERR)
      CALL MPI_GET_ADDRESS(PPFROMR(1,1),ADDR(2),IERR)
      CALL MPI_GET_ADDRESS(PVFROMR(1,1),ADDR(3),IERR)
      CALL MPI_GET_ADDRESS(IPFROMR(1),ADDR(4),IERR)
      D2(2) = ADDR(2) - ADDR(1) 
      D2(3) = ADDR(3) - ADDR(1)
      D2(4) = ADDR(4) - ADDR(1)
      CALL MPI_TYPE_STRUCT(4,B,D1,T,MYTYPE1,IERR)
      CALL MPI_TYPE_COMMIT(MYTYPE1, IERR)
      CALL MPI_TYPE_STRUCT(4,B,D2,T,MYTYPE2,IERR)
      CALL MPI_TYPE_COMMIT(MYTYPE2, IERR) 
      CALL MPI_GET_ADDRESS(TORIGHT,ADDR(1),IERR)
      CALL MPI_GET_ADDRESS(PPTOR(1,1),ADDR(2),IERR)
      CALL MPI_GET_ADDRESS(PVTOR(1,1),ADDR(3),IERR)
      CALL MPI_GET_ADDRESS(IPTOR(1),ADDR(4),IERR)  
      D3(2) = ADDR(2) - ADDR(1)
      D3(3) = ADDR(3) - ADDR(1)
      D3(4) = ADDR(4) - ADDR(1) 
      CALL MPI_GET_ADDRESS(FROMLEFT,ADDR(1),IERR)
      CALL MPI_GET_ADDRESS(PPFROML(1,1),ADDR(2),IERR)
      CALL MPI_GET_ADDRESS(PVFROML(1,1),ADDR(3),IERR)
      CALL MPI_GET_ADDRESS(IPFROML(1),ADDR(4),IERR)  
      D4(2) = ADDR(2) - ADDR(1) 
      D4(3) = ADDR(3) - ADDR(1)
      D4(4) = ADDR(4) - ADDR(1) 
      CALL MPI_TYPE_STRUCT(4,B,D3,T,MYTYPE3,IERR)
      CALL MPI_TYPE_COMMIT(MYTYPE3, IERR)
      CALL MPI_TYPE_STRUCT(4,B,D4,T,MYTYPE4,IERR)
      CALL MPI_TYPE_COMMIT(MYTYPE4, IERR)  
      TOLEFT = 0
      TORIGHT = 0
      DO 200 N = 1,NM
         ISELC = IP(2,N)
         IF(ISELC.EQ.11) GOTO 200
   IF((X2(N) .LT. XMIN) .OR. (X2(N) .GT. XMAX) .OR.   &
     &      (Y2(N) .LT. YMIN) .OR. (Y2(N) .GT. YMAX) .OR.   &
     &      (Z2(N) .LT. ZMIN) .OR. (Z2(N) .GT. ZMAX)) THEN
           IP(1,N) = 0
           IF(KDMPI.EQ. 1) THEN
             IF(X2(N) .LT. XMIN) THEN
               TOLEFT = TOLEFT + 1
           IPTOL(TOLEFT) = IP(2,N)
               DO K = 1,3
                 PPTOL(K,TOLEFT) = PP(K,N)
               ENDDO
               DO K = 1,6
                 PVTOL(K,TOLEFT) = PV(K,N)
               ENDDO
             ELSE IF(X2(N) .GT. XMAX) THEN
               TORIGHT = TORIGHT + 1
           IPTOR(TORIGHT) = IP(2,N)
               DO K = 1,3
                 PPTOR(K,TORIGHT) = PP(K,N)
               ENDDO
               DO K = 1,6
                 PVTOR(K,TORIGHT) = PV(K,N)
         ENDDO
       ENDIF
   ENDIF
200   CONTINUE


******************Here is the MPI_SENDRECV call section***********************

 


CALL MPI_SENDRECV(TOLEFT, 1, MYTYPE1, 
&      MYLEFT, TAG1, FROMRIGHT, 1, MYTYPE2, MYRIGHT, TAG1, 
&      MPI_COMM_WORLD, STATU, IERR)

 

************Here is the output corresponding to MPI_SENDRECV***********

toleft, 1,    mytype1,       myleft, tag1, fromright, 1,       mytype2,     myright, tag1 ,

0         1 -1946157051      -1        5            0           1 -1946157050       1            5      1140850688   0 

 

Labels (4)
0 Kudos
2 Replies
Barbara_P_Intel
Employee
676 Views

Intel MPI issues are best addressed on the HPC Toolkit Forum. I'll move this there for you.

 

0 Kudos
TobiasK
Moderator
648 Views

@JiaZhi3398 
Please use the latest compiler / Intel MPI version available.
Also can you please compile with "-g -traceback -check all,nouninit -O0" and run with -check_mpi?

0 Kudos
Reply