Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.

PDGEMM returns wrong result under specific conditions

duchemin__ivan
Beginner
608 Views

Hello,

I encountered a problem using Scalapack routine PDGEMM under specific conditions. After investigation, I managed to reproduce the problem with a simple test program. In my case, the error occurs for mkl versions from 16 to 18 (I tested the 17, 18.0 and 18.3 release on our local cluster linking with intelmpi and libmkl_blacs_intelmpi_lp64, the 16 and 17 on our national cluster Curie, linking with bullxmpi and libmkl_blacs_openmpi_lp64, and locally on the latest release of mkl 18 linking with openmpi and libmkl_blacs_openmpi_lp64). Linking with manually compiled version of Scalapack2.0.2 resolve the problem. Linking with mkl version 14 and 15 runs fine too.

The test consist in multiplying two matrices with all coeffs set to 1, then testing the result. It appears that for a 2 by 2 processor grid (i.e. mpirun -n 4 ), the resulting matrix can be wrong. Increasing the grid size correct the problem. The error is silent as the code runs and terminate normally. Apart from the mkl, the test code is standalone and pasted below and attached.

Cordially,

Ivan Duchemin.

! 
! The program test pdgemm matrix x matrix multiplication under fixed condition 
! on a square processor grid provided by the user.
! 
! The product tested is:
! 
!  C = A * B
! 
! with A being a 8160 x 8160  matrix with all coeffs set to 1
! and  B being a 8160 x 19140 matrix with all coeffs set to 1
! The result expected is thus all coeffs of C equal to 8160
! 
PROGRAM TEST
  
  ! Parameters
  INTEGER         , PARAMETER :: M=8160, N =19140, K=8160, DLEN_=9
  INTEGER         , PARAMETER :: CSRC=1, RSRC=1
  DOUBLE PRECISION, PARAMETER :: ONE=1.0D+0, ZERO=0.0D+0
  
  ! work variables
  INTEGER                                       :: ICTXT
  INTEGER                                       :: IAM
  INTEGER                                       :: NPROCS
  INTEGER                                       :: NPROW
  INTEGER                                       :: NPCOL
  INTEGER                                       :: MYROW
  INTEGER                                       :: MYCOL
  INTEGER                                       :: DESCA(9)
  INTEGER                                       :: DESCB(9)
  INTEGER                                       :: DESCC(9)
  INTEGER                                       :: M_A
  INTEGER                                       :: N_A
  INTEGER                                       :: M_B
  INTEGER                                       :: N_B
  INTEGER                                       :: M_C
  INTEGER                                       :: N_C
  INTEGER                                       :: MB_A
  INTEGER                                       :: NB_A
  INTEGER                                       :: MB_B
  INTEGER                                       :: NB_B
  INTEGER                                       :: MB_C
  INTEGER                                       :: NB_C
  DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: A
  DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: B
  DOUBLE PRECISION, ALLOCATABLE, DIMENSION(:,:) :: C
  
  ! Get starting information
  CALL BLACS_PINFO( IAM, NPROCS )
  
  ! try setting square grid
  NPROW = sqrt(REAL(NPROCS,kind=8))
  NPCOL = sqrt(REAL(NPROCS,kind=8))
  if ( NPROW*NPCOL .ne. NPROCS ) then
    print *,"please provide a square number of procs"
    stop 1
  end if
  
  ! Define process grid
  CALL BLACS_GET( -1, 0, ICTXT )
  CALL BLACS_GRIDINIT( ICTXT, 'R', NPROW, NPCOL )
  CALL BLACS_GRIDINFO( ICTXT, NPROW, NPCOL, MYROW, MYCOL )
  
  ! set A matrix dimensions
  M_A = M
  N_A = K
  
  ! set B matrix dimensions
  M_B = K
  N_B = N
  
  ! set C matrix dimensions
  M_C = M
  N_C = N
  
  ! set blocking factors for A matrix
  MB_A = M_A/NPROW
  NB_A = N_A/NPCOL
  
  ! set blocking factors for B matrix
  MB_B = M_B/NPROW
  NB_B = 32
  
  ! set blocking factors for C matrix
  MB_C = M_C/NPROW
  NB_C = 32
  
  ! get A local dimensions
  MLOC_A = NUMROC( M_A, MB_A, MYROW, 0, NPROW )
  NLOC_A = NUMROC( N_A, NB_A, MYCOL, 0, NPCOL )
  
  ! get B local dimensions
  MLOC_B = NUMROC( M_B, MB_B, MYROW, 0, NPROW )
  NLOC_B = NUMROC( N_B, NB_B, MYCOL, 0, NPCOL )
  
  ! get C local dimensions
  MLOC_C = NUMROC( M_C, MB_C, MYROW, 0, NPROW )
  NLOC_C = NUMROC( N_C, NB_C, MYCOL, 0, NPCOL )
  
  ! Initialize the array descriptor for the matrix A, B and C
  CALL DESCINIT( DESCA, M_A, N_A, MB_A, NB_A, 0, 0, ICTXT, max(MLOC_A,1), INFO )
  CALL DESCINIT( DESCB, M_B, N_B, MB_B, NB_B, 0, 0, ICTXT, max(MLOC_B,1), INFO )
  CALL DESCINIT( DESCC, M_C, N_C, MB_C, NB_C, 0, 0, ICTXT, max(MLOC_C,1), INFO )
  
  ! print grid infos
  do IPROC=0,NPROCS-1
    if ( IPROC .eq. IAM ) then
      print *,""
      print *,"-------------------------"
      print *,"PROC, MYROW, MYCOL :",PROC,MYROW,MYCOL
      print *,"MLOC_A, NLOC_A :",MLOC_A,NLOC_A
      print *,"MLOC_B, NLOC_B :",MLOC_B,NLOC_B
      print *,"MLOC_C, NLOC_C :",MLOC_C,NLOC_C
      print *,"DESCA :",DESCA
      print *,"DESCB :",DESCB
      print *,"DESCC :",DESCC
      print *,"-------------------------"
      print *,""
    end if
    CALL SLEEP(2)
  end do
  
  ! allocate and set matrices
  ALLOCATE( A(MLOC_A,NLOC_A) )
  ALLOCATE( B(MLOC_B,NLOC_B) )
  ALLOCATE( C(MLOC_C,NLOC_C) )
  
  ! init A matrix
  do j=1,NLOC_A
    do i=1,MLOC_A
      A(i,j)=ONE
    end do
  end do
  
  ! init B matrix
  do j=1,NLOC_B
    do i=1,MLOC_B
      B(i,j)=ONE
    end do
  end do
  
  ! compute A * B
  CALL PDGEMM('N', 'N',       &
&             M, N, K,        &
&             ONE,            &
&             A, 1, 1, DESCA, &
&             B, 1, 1, DESCB, &
&             ZERO,           &
&             C, 1, 1, DESCC )
  
  ! check result
  do j=1,NLOC_C
    do i=1,MLOC_C
      if ( abs(C(i,j)-K) .gt. 1.0D-8 ) then
        print *,"Error: result differs from exact"
        print *,"C(",i,",",j,")=",C(i,j)
        print *,"expected ",K
        print *,"TEST FAILED!"
        stop 2
      end if
    end do
  end do
 
  ! inform that everything is ok
  if ( IAM .eq. 0 ) then
    print *,"TEST PASSED!"
  end if 

  ! terminate
  CALL BLACS_GRIDEXIT( ICTXT )
  CALL BLACS_EXIT( 0 )
  
END PROGRAM

 

 

 

0 Kudos
5 Replies
Gennady_F_Intel
Moderator
608 Views

Thanks for the case, Ivan.

We managed to reproduce the problem with 4 process but the test passed with 16 or 25... The problem is escalated and we will notify you of further updates of this case.

 

0 Kudos
Gennady_F_Intel
Moderator
608 Views

Ivan, the fix of this problem available into MKL 2019 u1. This update is available for download. Please check how this work on your side and let us know. 

0 Kudos
Gennady_F_Intel
Moderator
608 Views

attached the log file shows the test passed,( MKL_VERBOSE=1).

0 Kudos
mecej4
Honored Contributor III
608 Views

Gennady, did you intend the attachment in #4 to be seen only by privileged Intel personnel? Perhaps you selected "Sable Falls" without intending to?

0 Kudos
Gennady_F_Intel
Moderator
608 Views

Thanks mecej4, it was some technical problem with this forum. 

here is the log I captured 

 
  -------------------------
 PROC, MYROW, MYCOL :  0.0000000E+00           0           0
 MLOC_A, NLOC_A :        4080        4080
 MLOC_B, NLOC_B :        4080        9572
 MLOC_C, NLOC_C :        4080        9572
 DESCA :           1           0        8160        8160        4080        4080
           0           0        4080
 DESCB :           1           0        8160       19140        4080          32
           0           0        4080
 DESCC :           1           0        8160       19140        4080          32
           0           0        4080
 -------------------------
 PROC, MYROW, MYCOL :  0.0000000E+00           0           1
 MLOC_A, NLOC_A :        4080        4080
 MLOC_B, NLOC_B :        4080        9568
 MLOC_C, NLOC_C :        4080        9568
 DESCA :           1           0        8160        8160        4080        4080
           0           0        4080
 DESCB :           1           0        8160       19140        4080          32
           0           0        4080
 DESCC :           1           0        8160       19140        4080          32
           0           0        4080
 -------------------------
 PROC, MYROW, MYCOL :  0.0000000E+00           1           0
 MLOC_A, NLOC_A :        4080        4080
 MLOC_B, NLOC_B :        4080        9572
 MLOC_C, NLOC_C :        4080        9572
 DESCA :           1           0        8160        8160        4080        4080
           0           0        4080
 DESCB :           1           0        8160       19140        4080          32
           0           0        4080
 DESCC :           1           0        8160       19140        4080          32
           0           0        4080
 -------------------------
 PROC, MYROW, MYCOL :  0.0000000E+00           1           1
 MLOC_A, NLOC_A :        4080        4080
 MLOC_B, NLOC_B :        4080        9568
 MLOC_C, NLOC_C :        4080        9568
 DESCA :           1           0        8160        8160        4080        4080
           0           0        4080
 DESCB :           1           0        8160       19140        4080          32
           0           0        4080
 DESCC :           1           0        8160       19140        4080          32
           0           0        4080
 -------------------------
MKL_VERBOSE Intel(R) MKL 2019.0 Update 1 Product build 20180928 for Intel(R) 64 architecture Intel(R) Advanced Vector Extensions (Intel(R) AVX) enabled processors, Lnx 2.80GHz lp64 intel_thread
MKL_VERBOSE DGEMM(N,N,512,9568,4080,0x1250650,0x2b4f6b1de080,512,0x2b4f6e1af080,4080,0x1250658,0x2b4f587dd280,4080) 421.40ms CNR:OFF Dyn:1 FastMM:1 TID:0  NThr:5
...................
MKL_VERBOSE DGEMM(N,N,496,9572,4080,0x1250650,0x2b798943b080,496,0x2b798b41c080,4080,0x14f76c8,0x2b7975a51280,4080) 394.36ms CNR:OFF Dyn:1 FastMM:1 TID:0  NThr:5
 TEST PASSED!
 

0 Kudos
Reply