Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.

Intel MKL Extended Eigensolvers: SIGSEGV, segmentation fault occurred

Lukas_R_
Beginner
398 Views

Hi All,

I am trying to use Intel MKL Extended Eigensolver with intel/17.0.1.132 compiler on nersc/cori supercomputer. I am compiling with:

 

ftn -qopenmp -mkl diagonalise_sparse.f90 read_write_mtx.f90 -o diagonalise

During compilation I get warning:

/opt/intel/compilers_and_libraries_2017.1.132/linux/mkl/lib/intel64/libmkl_core.a(mkl_memory_patched.o): In function `mkl_serv_set_memory_limit':
mkl_memory.c:(.text+0x5a9): warning: Using 'dlopen' in statically linked applications requires at runtime the shared libraries from the glibc version used for linking

When I run my job with one process it finishes succesfully, however if I increase number of processes I get error

srun -C haswell -n 4 ./diagonalise
forrtl: severe (174): SIGSEGV, segmentation fault occurred

In any case this is my code:

! diagonalise_sparse.f90
program diagonalise
    use read_write_mtx
    implicit none
    include 'mpif.h'
    include 'mkl.fi'
    ! INCLUDE 'mkl_solvers_ee.fi'

    !-------------------------------
    !        Declarations
    !-------------------------------

    integer, parameter :: PROC_PER_INT = 4 ! Number of cores per interval
    integer :: nr_of_intervals             ! Number of intervals
    double precision :: interval_lenght    ! Lenght of single interval

    ! Filenames
    character(len=20) :: filepath = 'test.mtx'
    character(len=20) :: outfile_evals
    character(len=20) :: outfile_evects

    ! Eigensystem
    real(dp), allocatable :: val(:)     ! Eigensystem
    integer, allocatable :: row_ptr(:)  ! Sparse row array of val
    integer, allocatable :: col_ind(:)  ! Sparse column array of val
    integer :: order                    ! Order of given matrix
    integer :: nnz                      ! Number of nonzero elements

    ! Solver parameters
    integer :: M0                         ! Initial search subspace (M0 >= M)
    double precision :: Emin, Emax        ! Global search intervals
    double precision :: Eminl, Emaxl      ! Local search intervals
    integer, dimension(64) :: feastparam  ! FEAST parameters

    ! Results
    integer :: M                                             ! Nr of eigenvalues
    double precision, dimension(:,:), allocatable :: X, X0   ! Eigenvectors
    double precision, dimension(:), allocatable :: E, E0     ! Eigenvalues
    double precision, dimension(:), allocatable :: res, res0 ! Residuals
    double precision :: epsout    ! Relative error on trace
    integer :: loop               ! Nr of FEAST subspace iterations
    integer :: info               ! Error handling (if info=0: success)
    character(len=1) :: UPLO='L'  ! Matrix storage (Lower/Upper/Full)

    ! Time measurements
    double precision :: tottime
    integer :: count1, count2, count_rate

    ! Looping
    integer :: i, j, k

    ! MPI variables
    integer :: ierr, rank, numtasks, len                       ! Global
    integer :: lrank, lnumtasks, color, key, NEW_COMM_WORLD    ! Local
    character(len=MPI_MAX_PROCESSOR_NAME) :: hostname

    !-------------------------------
    !      MPI initialization
    !-------------------------------

    ! initialize MPI
    call MPI_INIT(ierr)
    ! get number of tasks
    call MPI_COMM_SIZE(MPI_COMM_WORLD, numtasks, ierr)
    ! get my rank
    call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr)

    ! Print mpi info from root node
    if (rank == 0) then
        print '(A,I0)', 'TOTAL_PROC: ', numtasks
    endif

    !-------------------------------
    !     Read matrix from file
    !-------------------------------

    call read_sparse_matrix(filepath, order, nnz, val, row_ptr, col_ind)

    if (rank == 0) then
        print '(A,I0)', 'SPARSE_MATRIX_SIZE: ', order
        print '(A,I0)', 'NNZ: ', nnz
    endif

    !-------------------------------
    !       Call eigensolver
    !-------------------------------

    ! Search parameters
    Emin = -10
    Emax = 26
    M0 = 6

    ! Allocate variables
    allocate(E(1:M0))         ! Eigenvalues
    allocate(X(1:order,1:M0)) ! Eigenvectors
    allocate(res(1:M0))       ! Residuals

    ! Init default FEAST parameters
    call feastinit(feastparam)
    feastparam(1) = 1  ! Print info

    ! Call eigensolver
    call system_clock(count1, count_rate)
    ! d = double
    ! s = symmetric
    ! csr = sparse
    ! ev = eigenvalue
    call dfeast_scsrev(UPLO, order, &     ! which part and order of matrix
            val, row_ptr, col_ind, &      ! matrix stored in csr format
            feastparam, epsout, loop, &   ! feast parameters, trace erro and nr of loops
            Emin, Emax, M0, &             ! search interval and initial subspace
            E, X, M, res, info)           ! diagonalization results
    call system_clock(count2, count_rate)

    ! Wait for all processes to finish
    call MPI_BARRIER(MPI_COMM_WORLD, ierr)
    tottime = (count2-count1)*1.0d0/count_rate

    if (rank == 0) then
        print '(A)', repeat('=', 70)
        print '(A,F12.2,A)', 'Diagonalization finished in ', tottime, 's'
        print '(A,I0)', 'Returncode ', info
        if (info /= 0) then
            print '(A)', '!!!Diagonalization was unsuccessful! STOPING'
            stop
        endif
        print '(A,I0,A,I0)', 'Number of found eigenvalues ', M, ' expected ', order
        print '(A)', repeat('=', 70)
    endif

    ! Close MPI processes
    call MPI_FINALIZE(ierr)
end program diagonalise
! read_write_mtx.f90
module read_write_mtx
    ! Module for reading and writing mtx format files created with scipy.mmwrite

    integer, parameter:: dp=kind(0.d0)    ! double precision

contains
    subroutine read_sparse_matrix(filepath, order, nnz, val, row_ptr, col_ind)
        ! Reads ``.mtx`` file created with ``scipy.mmwrite`` and stores 
        ! information in The Compressed Row Storage (CRS) format.
        ! The ``val`` vector stores the values of the nonzero elements of 
        ! the matrix, as they are traversed in a row-wise fashion. 
        ! The ``col_ind`` vector stores the column indexes of the elements in
        ! the ``val`` vector. That is if :math:`\mathrm{val}(k) = a_{i,j}` 
        ! then :math:`\mathrm{col\_ind}(k)=j`. The ``row_ptr`` vector stores
        ! the locations in the ``val`` vector that start a row, that is,
        ! if :math:`\mathrm{val}(k)=a_{i,j}` then
        ! :math:`\mathrm{row\_ptr}(i)\leq k \leq \mathrm{row\_ptr}(i+1)`.
        ! By convention, we define :math:`\mathrm{row\_ptr}(n+1)=\mathrm{nnz} + 1`,
        ! where ``nnz`` is the number of nonzeros in the matrix. 

        character(len=20), intent(in) :: filepath ! Path to mtx file
        integer, intent(out) :: order             ! Order of given matrix
        integer, intent(out) :: nnz               ! Number of nonzero elements in sparse matrix

        real(dp), allocatable, dimension(:), &
                & intent(out) :: val              ! Values of nonzero elements
        integer, allocatable, dimension(:), &
                & intent(out) :: row_ptr          ! Location in the val vect. that starts a row
        integer, allocatable, dimension(:), &
                & intent(out) :: col_ind          ! Column indexes of the elements in the val

        integer, dimension(:), allocatable :: ii, jj         ! Indexes of nonzero elements
        real(dp), dimension(:), allocatable :: direct_values ! Direct_Values corresponding to ii, jj

        character(len=90) :: cc   ! For comment reading
        integer :: i, c           ! Iteration variables

        !------------------------
        !  Count comment lines
        !------------------------

        open(10, file=trim(filepath), status='old')
        cc = '%'
        c = -1
        do while(cc(1:1) == '%')
            c = c + 1
            read(10,'(A)') cc
        enddo
        close(10)

        !------------------------------
        !  Read direct data (3 cols)
        !------------------------------

        open(10, file=trim(filepath), status='old')

        ! Ignore comments
        do i=1,c
            read(10,'(A1)') cc
        enddo

        ! Read matrix specifications
        read(10,*) order, order, nnz
        allocate(ii(nnz))
        allocate(jj(nnz))
        allocate(direct_values(nnz))

        ! Read columns data
        do i = 1, nnz
            read(10,*) ii(i), jj(i), direct_values(i)
        end do        
        close(10)

        !-------------------------------
        !  Convert data to CSR format
        !-------------------------------

        call convert2csr(order, nnz, ii, jj, direct_values, row_ptr, col_ind, val)

    end subroutine read_sparse_matrix


    subroutine convert2csr(order, nnz, ii, jj, direct_values, row_ptr, col_ind, val)
        ! Converts mtx data to feast compatible The Compressed Row Storage (CRS) format

        implicit none
        integer, intent(in) :: order                         ! Order of matrix
        integer, intent(in) :: nnz                           ! Number of nonzero elements
        integer, dimension(:), intent(in) :: ii, jj          ! Indexes of nonzero elements
        real(dp), dimension(:), intent(in) :: direct_values  ! Direct_Values corresponding to ii, jj

        real(dp), allocatable, dimension(:), &
                & intent(out) :: val              ! Values of nonzero elements
        integer, allocatable, dimension(:), &
                & intent(out) :: row_ptr          ! Location in the val vect. that starts a row
        integer, allocatable, dimension(:), &
                & intent(out) :: col_ind          ! Column indexes of the elements in the val

        integer :: k, k1, i, j, idum
        integer, dimension(order) :: iloc
        double precision :: adum

        allocate(row_ptr(1:order+1))
        row_ptr = 0
        allocate(col_ind(1:nnz))
        allocate(val(1:nnz))

        !-----------------------
        !  Creation of row_ptr 
        !-----------------------
        
        ! finds how many elements in row
        ! after this loop row_ptr stores nr of
        ! nonzero elements for each matrix row
        do  k = 1, nnz
            row_ptr(ii(k)) = row_ptr(ii(k)) + 1
        end do
        
        ! build row_ptr which maps each row to first nonzero element
        k = 1
        do i = 1, order+1
            k1 = row_ptr(i)
            row_ptr(i) = k
            k = k + k1
        end do

        !--------------------------------
        !  Creation of col_indx and val
        !--------------------------------

        iloc(1:order) = row_ptr(1:order)
        do  k = 1, nnz
            val(iloc(ii(k))) = direct_values(k)
            col_ind(iloc(ii(k))) = jj(k)
            iloc(ii(k)) = iloc(ii(k)) + 1
        end do

        ! Reorder by increasing column
        do i = 1, order
            do k = row_ptr(i), row_ptr(i+1) - 1
                do k1 = k, row_ptr(i+1) - 1
                    if (col_ind(k1) < col_ind(k)) then
                        idum = col_ind(k)
                        col_ind(k) = col_ind(k1)
                        col_ind(k1) = idum
                        adum = val(k)
                        val(k) = val(k1)
                        val(k1) = adum
                    endif
                enddo
            enddo
        enddo

    end subroutine convert2csr
end module read_write_mtx
%% test.mtx
%
6 6 19
1 1 10
1 5 -2
2 1 3
2 2 9
2 6 3
3 2 7
3 3 8
3 4 7
4 1 3
4 3 8
4 4 7
4 5 5
5 2 8
5 4 9
5 5 9
5 6 13
6 2 4
6 5 2
6 6 -1

 

0 Kudos
2 Replies
Irina_S_Intel
Employee
398 Views

Hello Lukas,

Thank you for the reproducer! I run your example and see crash on 4 nodes. At a first glance code looks fine so I need more time to find the root cause. I will keep you updated.

 

Best regards,

Irina

0 Kudos
Irina_S_Intel
Employee
398 Views

Hello Lukas,

The cause of your problem is that you pass UPLO='L' but matrix stored in csr format is full(both upper and lower triangular are sored) and non symmetric. Intel MKL Extended Eigensolver works only with symmetric matrices. If you wanted to solve eigenvalue problem with upper triangular part of your input matrix as a part of symmetric matrix, correct usage model would be to pass only upper triangular part in csr format and set UPLO='L'.

In future, to avoid that kind of problems please use feastparam(27)=1 as it applies matrix checker on input matrix.

Best regards,

Irina

0 Kudos
Reply