Solved: Re: Re:Error with sparse x dense mkl code

Disha · ‎07-31-2020

//==============================================================
//
// SAMPLE SOURCE CODE - SUBJECT TO THE TERMS OF SAMPLE CODE LICENSE AGREEMENT,
// http://software.intel.com/en-us/articles/intel-sample-source-code-license-agreement/
//
// Copyright 2016-2018 Intel Corporation
//
// THIS FILE IS PROVIDED "AS IS" WITH NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT
// NOT LIMITED TO ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE, NON-INFRINGEMENT OF INTELLECTUAL PROPERTY RIGHTS.
//
// =============================================================
/*******************************************************************************
*   This example measures performance of computing the real matrix product 
*   C=alpha*A*B+beta*C using Intel(R) MKL function dgemm, where A, B, and C are 
*   matrices and alpha and beta are double precision scalars. 
*
*   In this simple example, practices such as memory management, data alignment, 
*   and I/O that are necessary for good programming style and high Intel(R) MKL 
*   performance are omitted to improve readability.
********************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include "mkl.h"
#include "mkl_spblas.h"

#define CALL_AND_CHECK_STATUS(function, error_message) do { \
          if(function != SPARSE_STATUS_SUCCESS)             \
          {                                                 \
          printf(error_message); fflush(0);                 \
          status = 1;                                       \
          }                                                 \
} while(0)

/* Consider adjusting LOOP_COUNT based on the performance of your computer */
/* to make sure that total run time is at least 1 second */
#define LOOP_COUNT 10

int main()
{
    double *B, *C, *values;
    int m, n, p, i, r, temp2, j;
    double alpha, beta;
    double s_initial, s_elapsed;
   
    sparse_matrix_t A;
    sparse_operation_t operation = SPARSE_OPERATION_NON_TRANSPOSE;
    
    struct matrix_descr descrA;
    descrA.type = SPARSE_MATRIX_TYPE_GENERAL;
    
    sparse_layout_t layout = SPARSE_LAYOUT_ROW_MAJOR;
    sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
    //MKL_INT *rows_start;
    MKL_INT *rows_end; 
    MKL_INT status;
    MKL_INT *col_indx;

    printf ("\n This example measures performance of Intel(R) MKL function dgemm \n"
            " computing real matrix C=alpha*A*B+beta*C, where A, B, and C \n"
            " are matrices and alpha and beta are double precision scalars\n\n");

    m = p = n = 1000; temp2 = 0;
    printf (" Initializing data for matrix multiplication C=A*B for matrix \n"
            " A(%ix%i) and matrix B(%ix%i)\n\n", m, p, p, n);
    alpha = 1.0; beta = 0.0;

    printf (" Allocating memory for matrices aligned on 64-byte boundary for better \n"
            " performance \n\n");

    values = (double *)mkl_malloc( m *sizeof( double ), 64 );
    B = (double *)mkl_malloc( p*n *sizeof( double ), 64 );
    C = (double *)mkl_malloc( m*n *sizeof( double ), 64 );
    col_indx = (MKL_INT *)mkl_malloc(m *sizeof(MKL_INT), 64);
    //rows_start = (MKL_INT *)mkl_malloc(sizeof(MKL_INT) * m, 64)
    rows_end = (MKL_INT *)mkl_malloc((m + 1) *sizeof(MKL_INT), 64);
   
    if (A == NULL || B == NULL || C == NULL) 
    {
        printf( "\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
        mkl_free(A);
        mkl_free(B);
        mkl_free(C);
        return 1;
    }
    

    printf (" Intializing matrix data \n\n");
    
     
    for( i = 0; i < m; i++ )
          values[i] = i + m;
    
    for( i = 0; i < m; i++ )
          col_indx[i] = i % 2;
    
    rows_end[0] = 0;
    for( i = 1; i < m + 1; i++ )
          rows_end[i] = rows_end[i - 1] + 2;

    
    
    for (i = 0; i < (p*n); i++) 
    {
        B[i] = (double)(-i-1);
    }

    
    for (i = 0; i < (m*n); i++) 
    {
        C[i] = 0.0;
    }


    printf (" Making the first run of matrix product using Intel(R) MKL dgemm function \n"
            " via CBLAS interface to get stable run time measurements \n\n");
    
    mkl_sparse_d_create_csr (&A, indexing, m, p, rows_end, rows_end + 1, col_indx, values);
    
    
    CALL_AND_CHECK_STATUS (mkl_sparse_d_mm(operation, alpha, A, descrA, layout, B, n, p, beta, C, m), "Error after MKL_SPARSE_D_MV, csrC*x  \n");
     
    //cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, p, alpha, A, p, B, n, beta, C, n);

    printf (" Measuring performance of matrix product using Intel(R) MKL dgemm function \n"
            " via CBLAS interface \n\n");
    s_initial = dsecnd();
    
    for (r = 0; r < LOOP_COUNT; r++) 
    {
        //cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, m, n, p, alpha, A, p, B, n, beta, C, n);
        CALL_AND_CHECK_STATUS (mkl_sparse_d_mm(operation, alpha, A, descrA, layout, B, n, p, beta, C, m), "Error after MKL_SPARSE_D_MV, csrC*x  \n");
    
    }
    
    s_elapsed = (dsecnd() - s_initial) / LOOP_COUNT;

    printf (" == Matrix multiplication using Intel(R) MKL dgemm completed == \n"
            " == at %.5f milliseconds == \n\n", (s_elapsed * 1000));
    
    printf (" Deallocating memory \n\n");
    mkl_free(A);
    mkl_free(B);
    mkl_free(C);
    
    if (s_elapsed < 0.9/LOOP_COUNT) {
        s_elapsed=1.0/LOOP_COUNT/s_elapsed;
        i=(int)(s_elapsed*LOOP_COUNT)+1;
        printf(" It is highly recommended to define LOOP_COUNT for this example on your \n"
               " computer as %i to have total execution time about 1 second for reliability \n"
               " of measurements\n\n", i);
    }

    printf (" Example completed. \n\n");
    return 0;
    
    
    
}

I have some trouble running this code. Its a sparse x dense mkl matrix multiplication code. So I convert the sparse matrix into CSR format and then multiply it with a dense matrix. I get segmentation fault whenever I try to run it. Could someone tell me where I'm going wrong?

ChithraJ_Intel · ‎08-04-2020

Hi Disha,

We have tried out running your MKL code in Devcloud with below steps and it's working fine without any issues. The issue might be you are not using -DMKL_ILP64 flag while compiling your application. We recommended to use this flag while compilation, since it's resolve the issue for us.

Steps tried:

1) source /opt/intel/inteloneapi/setvars.sh :- all oneAPI environment variables are set properly

2) export MKLROOT=/opt/intel/inteloneapi/mkl/latest/

3) gcc -fopenmp -DMKL_ILP64 -m64 -I${MKLROOT}/include a.cpp -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_gnu_thread -lpthread -lm -ldl

4) ./a.out

Please try the above steps and let us know the updates.

Please refer to the below MKL user guide for more information about compiling with ILP64 libraries and -DMKL_ILP64 compiler options.

https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-in-detail/linking-with-interface-libraries/using-the-ilp64-interface-vs-lp64-interface.html

Hopes this resolve your issue.Please feel free to reach out to us in case of any queries.

Regards,

Chithra

View solution in original post

ChithraJ_Intel · ‎08-03-2020

Hi Disha,

Thanks for reaching out to us!

We will be very happy to help you with your issue. Please let us know the environment in which you are trying out the workload. Is it in Devcloud or in your local machine. If so, could you give us your environment details. Also, please provide the complete steps you followed if possible. So that we will be able to investigate more on the issue you are facing.

Regards,

Chithra

Disha · ‎08-03-2020

The issue I'm facing is on Devcloud. So I'm trying to use 'mkl_sparse_d_mm' for multiplying sparse matrix into dense matrix. I ran this code on Devcloud and I'm trying to use the above function and it results in segmentation error.

(Please ignore the print statements. They're irrelevant to this code.)

ChithraJ_Intel · ‎08-04-2020

Hi Disha,

We have tried out running your MKL code in Devcloud with below steps and it's working fine without any issues. The issue might be you are not using -DMKL_ILP64 flag while compiling your application. We recommended to use this flag while compilation, since it's resolve the issue for us.

Steps tried:

1) source /opt/intel/inteloneapi/setvars.sh :- all oneAPI environment variables are set properly

2) export MKLROOT=/opt/intel/inteloneapi/mkl/latest/

3) gcc -fopenmp -DMKL_ILP64 -m64 -I${MKLROOT}/include a.cpp -Wl,--no-as-needed -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_core -lmkl_gnu_thread -lpthread -lm -ldl

4) ./a.out

Please try the above steps and let us know the updates.

Please refer to the below MKL user guide for more information about compiling with ILP64 libraries and -DMKL_ILP64 compiler options.

https://software.intel.com/content/www/us/en/develop/documentation/mkl-linux-developer-guide/top/linking-your-application-with-the-intel-math-kernel-library/linking-in-detail/linking-with-interface-libraries/using-the-ilp64-interface-vs-lp64-interface.html

Hopes this resolve your issue.Please feel free to reach out to us in case of any queries.

Regards,

Chithra

Disha · ‎08-04-2020

Thanks a lot. My code is running without any errors

ChithraJ_Intel · ‎08-05-2020

Hi Disha,

Glad to know that the solution provided helpful for you. We won't be monitoring this thread anymore. Kindly raise a new thread if you need further assistance.

Regards,

Chithra