Solved: Results mismatch in SpMV_T with COO/CSC/CSR when single-precision is used.

HSPARK · ‎08-15-2022

The following code is the simplified structure used for testing.

-------------------------------------------------------------------

#include "mkl_types.h"

#include "mkl_spblas.h"

#include <iostream>

#include <iomanip>

#include <vector>

template <typename d_Type>

struct COO

{

std::string matrix_name;

uint32_t num_row;

uint32_t num_col;

uint32_t num_nz;

std::vector<uint32_t> row_idx;

std::vector<uint32_t> col_idx;

std::vector<d_Type> ele;

};

COO<float> sample_coo{

"test_case", 10, 10, 18,

std::vector<uint32_t>{0, 0, 0, 1, 2, 2, 3, 4, 4, 5, 5, 6, 7, 7, 8, 8, 8, 9},

std::vector<uint32_t>{0, 3, 6, 1, 2, 5, 3, 1, 4, 5, 9, 6, 4, 7, 6, 7, 8, 9},

std::vector<float>{1, 4, 7, 2, 3, 6, 4, 2, 5, 6, 10, 7, 5, 8, 7, 8, 9, 10}};

template <typename d_Type>

struct COMPRESSED

{

std::string matrix_name;

uint32_t num_row;

uint32_t num_col;

uint32_t num_nz;

std::vector<uint32_t> idx_ptr;

std::vector<uint32_t> idx;

std::vector<d_Type> ele;

};

COMPRESSED<float> sample_csc{

"test_case", 10, 10, 18,

std::vector<uint32_t>{0, 1, 3, 4, 6, 8, 10, 13, 15, 16, 18},

std::vector<uint32_t>{0, 1, 4, 2, 0, 3, 4, 7, 2, 5, 0, 6, 8, 7, 8, 8, 5, 9},

std::vector<float>{1, 2, 2, 3, 4, 4, 5, 5, 6, 6, 7, 7, 7, 8, 8, 9, 10, 10}};

COMPRESSED<float> sample_csr{

"test_case", 10, 10, 18,

std::vector<uint32_t>{0, 3, 4, 6, 7, 9, 11, 12, 14, 17, 18},

std::vector<uint32_t>{0, 3, 6, 1, 2, 5, 3, 1, 4, 5, 9, 6, 4, 7, 6, 7, 8, 9},

std::vector<float>{1, 4, 7, 2, 3, 6, 4, 2, 5, 6, 10, 7, 5, 8, 7, 8, 9, 10}};

const float m_alpha = 1.0;

const float m_beta = 0.0;

uint32_t print_upto = 5;

int main()

{

int num_row = sample_coo.num_row;

int num_col = sample_coo.num_col;

int num_nz = sample_coo.num_nz;

float *m_vec_x = new float[num_col];

float *m_vec_b = new float[num_col];

for (uint32_t idx_vec = 0; idx_vec < num_col; ++idx_vec)

{

m_vec_x[idx_vec] = 1.0;

m_vec_b[idx_vec] = 0.0;

}

// For COO Operation

MKL_INT *m_row_idx = new MKL_INT[num_nz];

MKL_INT *m_col_idx = new MKL_INT[num_nz];

float *m_mat_ele = new float[num_nz];

for (uint32_t idx = 0; idx < num_nz; ++idx)

{

m_row_idx[idx] = sample_coo.row_idx[idx];

m_col_idx[idx] = sample_coo.col_idx[idx];

m_mat_ele[idx] = sample_coo.ele[idx];

}

sparse_matrix_t m_mat_A;

sparse_status_t m_status;

matrix_descr m_descr_A;

m_status = mkl_sparse_s_create_coo(&m_mat_A, SPARSE_INDEX_BASE_ZERO,

num_row, num_col, num_nz, m_row_idx, m_col_idx, m_mat_ele);

m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;

m_status = mkl_sparse_optimize(m_mat_A);

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "COO SpMV:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "COO SpMV_T:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

mkl_sparse_destroy(m_mat_A);

delete m_row_idx;

delete m_col_idx;

delete m_mat_ele;

// For CSC

MKL_INT *m_idx_ptr = new MKL_INT[num_col + 1];

for (uint32_t idx_idx_ptr = 0; idx_idx_ptr < num_col + 1; ++idx_idx_ptr)

{

m_idx_ptr[idx_idx_ptr] = sample_csc.idx_ptr[idx_idx_ptr];

}

m_row_idx = new MKL_INT[num_nz];

m_mat_ele = new float[num_nz];

for (uint32_t idx_nz = 0; idx_nz < num_nz; ++idx_nz)

{

m_row_idx[idx_nz] = sample_csc.idx[idx_nz];

m_mat_ele[idx_nz] = sample_csc.ele[idx_nz];

}

m_status = mkl_sparse_s_create_csc(&m_mat_A, SPARSE_INDEX_BASE_ZERO,

num_row, num_col, m_idx_ptr, m_idx_ptr + 1, m_row_idx, m_mat_ele);

m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;

m_status = mkl_sparse_optimize(m_mat_A);

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "CSC SpMV:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "CSC SpMV_T:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

mkl_sparse_destroy(m_mat_A);

delete m_idx_ptr;

delete m_row_idx;

delete m_mat_ele;

// For CSR

m_idx_ptr = new MKL_INT[num_row + 1];

for (uint32_t idx_row_ptr = 0; idx_row_ptr < num_row + 1; ++idx_row_ptr)

{

m_idx_ptr[idx_row_ptr] = sample_csr.idx_ptr[idx_row_ptr];

}

m_row_idx = new MKL_INT[num_nz];

m_mat_ele = new float[num_nz];

for (uint32_t idx_nz = 0; idx_nz < num_nz; ++idx_nz)

{

m_col_idx[idx_nz] = sample_csr.idx[idx_nz];

m_mat_ele[idx_nz] = sample_csr.ele[idx_nz];

}

m_status = mkl_sparse_s_create_csr(&m_mat_A, SPARSE_INDEX_BASE_ZERO,

num_row, num_col, m_idx_ptr, m_idx_ptr + 1, m_col_idx, m_mat_ele);

m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;

m_status = mkl_sparse_optimize(m_mat_A);

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "CSR SpMV:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE,

m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);

std::cout << "CSR SpMV_T:\n";

for (uint32_t iter = 0; iter < print_upto; ++iter)

{

std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";

}

std::cout << std::endl;

mkl_sparse_destroy(m_mat_A);

delete m_idx_ptr;

delete m_col_idx;

delete m_mat_ele;

return 0;

}

-----------------------------------------------------------------------------------

I tested fifteen test cases from `http://sparse.tamu.edu/` with math libraries, including MKL.

It worked fine with double-precision; every computation result was matched to each other.

But when single-precision is used, for ohne2 `https://sparse.tamu.edu/Schenk_ISEI/ohne2`, MKL generated a different result from other libraries.

Expected

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06
SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00

MKL_COO

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06
SpMV_T: 1.19e+02 2.56e+02 3.84e+02 4.39e+02 -6.40e+01

MKL_CSC

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06
SpMV_T: 0.00e+00 2.71e+02 3.03e+02 4.49e+02 2.02e+01

MKL_CSR

SpMV: 6.94e-06 1.53e-05 1.70e-05 1.71e-05 6.94e-06

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00

If MKL's results match while not matching the others, I would expect I passed incorrect input.
But for SpMV, COO and CSC generate expected results while CSR generates unexpected, and
for SpMV_T, CSR generates expected while COO and CSC generate unexpected.

Is there a mistake I made in the code?

Thank you for checking my post.

Hong

VidyalathaB_Intel · ‎12-29-2022

Hi @HSPARK ,

As the issue is already addressed and the fix is provided, we are going ahead and closing this issue. Please post a new question if you need any additional assistance from Intel as this thread will no longer be monitored.

Regards,

Vidya.

View solution in original post

VidyalathaB_Intel · ‎08-17-2022

Hi Hong,

Thanks for reaching out to us.

Could you please provide us with the steps(or commands to compile and run) you have followed to reproduce the results that you are getting from our end? Because when I tried it, I got different results from yours.

Additionally please let us know your OS environment details and MKL version with which you are working.

Regards,

Vidya.

HSPARK · ‎08-17-2022

Thank you for the response, Vidya.
The followings are my environments:

OS environment: (Run on WSL2 - 5.4.72-microsoft-standard-WSL2)

PRETTY_NAME="Ubuntu 22.04 LTS"
NAME="Ubuntu"
VERSION_ID="22.04"
VERSION="22.04 LTS (Jammy Jellyfish)"
VERSION_CODENAME=jammy
ID=ubuntu
ID_LIKE=debian

MKL version

2022.1.0

Used Compilers and Command

GCC - g++ (Ubuntu 11.2.0-19ubuntu1) 11.2.0
- g++ -m64 -std=c++20 -I /usr/local/include -DMKL_ILP64 -I"/opt/intel/oneapi/mkl/2022.1.0/include" to_ask_v3.cpp -o to_ask -L/opt/intel/oneapi/mkl/2022.1.0/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lgsl -lgslcblas
Clang - Ubuntu clang version 14.0.0-1ubuntu1
- clang++ -m64 -std=c++20 -I /usr/local/include -DMKL_ILP64 -I"/opt/intel/oneapi/mkl/2022.1.0/include" to_ask_v3.cpp -o to_ask -L/opt/intel/oneapi/mkl/2022.1.0/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lgsl -lgslcblas
INTEL - Intel(R) oneAPI DPC++/C++ Compiler 2022.1.0 (2022.1.0.20220316)
- dpcpp -std=c++20 -I /usr/local/include -DMKL_ILP64 -I"/opt/intel/oneapi/mkl/2022.1.0/include" to_ask_v3.cpp -o to_ask -L/opt/intel/oneapi/mkl/2022.1.0/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lgsl -lgslcblas

I added the full version of the code I used (including how I loaded the MTX file).
GSL is used to load MTX(MatrixMarket) format easily.
Please let me know if MKL has a similar feature.

--------------------------------------------------------------------------------------------

#include <iostream>
#include <iomanip>
#include <vector>

#include <mkl_types.h>
#include <mkl_spblas.h>
#include <gsl/gsl_spmatrix.h>
#include <gsl/gsl_spblas.h>

const float m_alpha = 1.0;
const float m_beta = 0.0;
uint32_t print_upto = 5;

int main()
{
    std::string test_case = "ohne2.mtx";
    FILE *load_mtx;
    load_mtx = fopen64(test_case.c_str(), "r");
    gsl_spmatrix *gsl_mat_A = gsl_spmatrix_fscanf(load_mtx);
    fclose(load_mtx);

    uint32_t num_row = gsl_mat_A->size1;
    uint32_t num_col = gsl_mat_A->size2;
    uint32_t num_nz = gsl_mat_A->nz;

    // build Vector x and Vector b for Ax = b
    float *m_vec_x = new float[num_col];
    float *m_vec_b = new float[num_col];
    for (uint32_t idx_vec = 0; idx_vec < num_col; ++idx_vec)
    {
        m_vec_x[idx_vec] = 1.0;
        m_vec_b[idx_vec] = 0.0;
    }

    // // For COO
    // // build A in MKL COO
    MKL_INT *m_row_idx = new MKL_INT[num_nz];
    MKL_INT *m_col_idx = new MKL_INT[num_nz];
    float *m_mat_ele = new float[num_nz];
    for (uint32_t idx_nz = 0; idx_nz < gsl_mat_A->nz; ++idx_nz)
    {
        m_row_idx[idx_nz] = gsl_mat_A->i[idx_nz];
        m_col_idx[idx_nz] = gsl_mat_A->p[idx_nz];
        m_mat_ele[idx_nz] = (float)gsl_mat_A->data[idx_nz];
    }

    sparse_matrix_t m_mat_A;
    sparse_status_t m_status;
    matrix_descr m_descr_A;
    m_status = mkl_sparse_s_create_coo(&m_mat_A, SPARSE_INDEX_BASE_ZERO, num_row, num_col, num_nz, m_row_idx, m_col_idx, m_mat_ele);
    m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;
    m_status = mkl_sparse_optimize(m_mat_A);

    std::cout << "MKL COO\n";
    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV:    ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV_T:  ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    mkl_sparse_destroy(m_mat_A);
    delete [] m_row_idx;
    delete [] m_col_idx;
    delete [] m_mat_ele;




    // For CSC
    gsl_spmatrix *in_CSC = gsl_spmatrix_ccs(gsl_mat_A);
    MKL_INT *m_idx_ptr = new MKL_INT[num_col + 1];
    for (uint32_t idx_ptr = 0; idx_ptr < num_col + 1; ++idx_ptr)
    {
        m_idx_ptr[idx_ptr] = in_CSC->p[idx_ptr];
    }

    m_row_idx = new MKL_INT[num_nz];
    m_mat_ele = new float[num_nz];
    for (uint32_t idx_nz = 0; idx_nz < num_nz; ++idx_nz)
    {
        m_row_idx[idx_nz] = in_CSC->i[idx_nz];
        m_mat_ele[idx_nz] = in_CSC->data[idx_nz];
    }

    m_status = mkl_sparse_s_create_csc(&m_mat_A, SPARSE_INDEX_BASE_ZERO, num_row, num_col, m_idx_ptr, m_idx_ptr + 1, m_row_idx, m_mat_ele);
    m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;
    m_status = mkl_sparse_optimize(m_mat_A);

    std::cout << "MKL CSC\n";
    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV:    ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV_T:  ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    mkl_sparse_destroy(m_mat_A);
    delete [] m_idx_ptr;
    delete [] m_row_idx;
    delete [] m_mat_ele;
    gsl_spmatrix_free(in_CSC);




    // For CSR
    gsl_spmatrix *in_CSR = gsl_spmatrix_crs(gsl_mat_A);
    m_idx_ptr = new MKL_INT[num_row + 1];
    for (uint32_t idx_row_ptr = 0; idx_row_ptr < num_row + 1; ++idx_row_ptr)
    {
        m_idx_ptr[idx_row_ptr] = in_CSR->p[idx_row_ptr];
    }

    m_col_idx = new MKL_INT[num_nz];
    m_mat_ele = new float[num_nz];
    for (uint32_t idx_nz = 0; idx_nz < num_nz; ++idx_nz)
    {
        m_col_idx[idx_nz] = in_CSR->i[idx_nz];
        m_mat_ele[idx_nz] = in_CSR->data[idx_nz];
    }

    m_status = mkl_sparse_s_create_csr(&m_mat_A, SPARSE_INDEX_BASE_ZERO, num_row, num_col, m_idx_ptr, m_idx_ptr + 1, m_col_idx, m_mat_ele);
    m_descr_A.type = sparse_matrix_type_t::SPARSE_MATRIX_TYPE_GENERAL;
    m_status = mkl_sparse_optimize(m_mat_A);
    
    std::cout << "MKL CSR\n";
    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_NON_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV:    ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    m_status = mkl_sparse_s_mv(sparse_operation_t::SPARSE_OPERATION_TRANSPOSE, m_alpha, m_mat_A, m_descr_A, m_vec_x, m_beta, m_vec_b);
    std::cout << "SpMV_T:  ";
    for (uint32_t iter = 0; iter < print_upto; ++iter)
    {
        std::cout << std::right << std::setw(10) << std::scientific << std::setprecision(2) << m_vec_b[iter] << "\t";
    }
    std::cout << std::endl;

    mkl_sparse_destroy(m_mat_A);
    delete [] m_idx_ptr;
    delete [] m_col_idx;
    delete [] m_mat_ele;
    gsl_spmatrix_free(in_CSR);

    return 0;
}

VidyalathaB_Intel · ‎08-25-2022

Hi Hong,

Thanks for sharing the details.

For the code in your first post this is the output I'm getting

COO SpMV:

1.20e+01 2.00e+00 9.00e+00 4.00e+00 7.00e+00

COO SpMV_T:

1.00e+00 4.00e+00 3.00e+00 8.00e+00 1.00e+01

CSC SpMV:

1.20e+01 2.00e+00 9.00e+00 4.00e+00 7.00e+00

CSC SpMV_T:

1.00e+00 4.00e+00 3.00e+00 8.00e+00 1.00e+01

CSR SpMV:

1.20e+01 2.00e+00 9.00e+00 4.00e+00 7.00e+00

CSR SpMV_T:

1.00e+00 4.00e+00 3.00e+00 8.00e+00 1.00e+01

whereas for the second code, I'm getting segmentation fault error.

Could you please let us know if there is any input file that need to be loaded to run the code?

>>Please let me know if MKL has a similar feature.

No, it doesn't have any such feature.

Regards,

Vidya.

HSPARK · ‎08-25-2022

Thank you for your concern Vidya.

The file I used (which generated incorrect results) is "ohne2.mtx" from https://sparse.tamu.edu/Schenk_ISEI/ohne2

Please use the "Matrix Market" one from the download link.

It contains two files, and the used file is `ohne2.mtx`

which is already specified in the second code -> std::string test_case = "ohne2.mtx";

VidyalathaB_Intel · ‎08-25-2022

Thanks for pointing it out Hong!

It would be a great help if you could attach the ohne2.mtx file here as it is not getting downloaded when I'm trying to get it from the provided link.

Could you please help me with that so that we can proceed further in this case?

Regards,

Vidya.

HSPARK · ‎08-25-2022

Sorry about the inaccurate explanation;

the website does not allow downloading the file by clicking the tab.

When
Google Chrome is used - right-click `Matrix Market` and save the link.
Terminal is used - wget https://suitesparse-collection-website.herokuapp.com/MM/Schenk_ISEI/ohne2.tar.gz

I hope this works.
I may better attach the file, but it is approximately 250MB when uncompressed, so I think that will not be a good idea.

Have a wonderful day.

VidyalathaB_Intel · ‎08-25-2022

Yeah, now I'm able to access it. Thanks, I'll get back to you with the results.

-Vidya

VidyalathaB_Intel · ‎08-26-2022

Hi Hong,

The issue is reproducible and in my case the results are as follows (i changed print_upto value to 7)

For COO I don't observe any difference in results whereas for CSC and CSR there are differences

>>Expected

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00

Results obtained:

MKL COO

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06 2.38e-05 1.37e-05

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00 -1.90e+01 -8.89e+01

MKL CSC

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06 2.38e-05 1.37e-05

SpMV_T: 1.00e+02 2.71e+02 2.66e+02 3.76e+02 0.00e+00 -1.28e+02 -1.28e+02

MKL CSR

SpMV: 6.94e-06 1.04e-05 1.53e-05 3.05e-05 6.94e-06 3.05e-05 1.53e-05

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00 -1.90e+01 -8.89e+01

Could you please try changing the below lines as shown and give it a try?

For CSR line 127 - 128

>>m_idx_ptr = new MKL_INT[num_row + 1];

for (uint32_t idx_row_ptr = 0; idx_row_ptr < num_row + 1; ++idx_row_ptr)

<<

m_idx_ptr = new MKL_INT[num_row];

  for (uint32_t idx_row_ptr = 0; idx_row_ptr < num_row; ++idx_row_ptr)

For CSC line 81 - 82

>>MKL_INT *m_idx_ptr = new MKL_INT[num_col + 1];

for (uint32_t idx_ptr = 0; idx_ptr < num_col + 1; ++idx_ptr)

<<

MKL_INT *m_idx_ptr = new MKL_INT[num_col];

  for (uint32_t idx_ptr = 0; idx_ptr < num_col; ++idx_ptr)

With the above changes i could see the results are same for all formats (tried with both dpcpp and g++ compilers on Ubuntu 18.04.6)

MKL COO

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06 2.38e-05 1.37e-05

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00 -1.90e+01 -8.89e+01

MKL CSC

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06 2.38e-05 1.37e-05

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00 -1.90e+01 -8.89e+01

MKL CSR

SpMV: 1.53e-05 1.53e-05 3.05e-05 3.05e-05 7.63e-06 2.38e-05 1.37e-05

SpMV_T: 6.40e+01 2.65e+02 3.48e+02 4.57e+02 0.00e+00 -1.90e+01 -8.89e+01

My hunch is that there might be some mistake with row_start and col_start parameters in the code (refer mkl_sparse_?_create_csr & mkl_sparse_?_create_csc routines) and tried modifying them.

Please give it a try and let me know if there is any issue.

Regards,

Vidya.

HSPARK · ‎08-29-2022

Thank you for your continued support Vidya.

Also, sorry for the late response, my condition was not well during the weekend.

If possible, can I possibly have modified code and the compilation command?

I made modifications suggested by you, but it did not change the result with my system.

Thank you for your support.

VidyalathaB_Intel · ‎08-30-2022

Hi Hong,

>>Also, sorry for the late response, my condition was not well during the weekend.

No problem and hope you are doing well now.

>>can I possibly have modified code and the compilation command?

Please find the attached code in test.zip and the compilation commands used.

Using dpcpp:

dpcpp -std=c++20 -I /usr/local/include -DMKL_ILP64 -I"/opt/intel/oneapi/mkl/2022.1.0/include" main1.cpp -o to_ask -L/opt/intel/oneapi/mkl/2022.1.0/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lgsl -lgslcblas

Using g++:

g++ -m64 -DMKL_ILP64 -I"/opt/intel/oneapi/mkl/2022.1.0/include" main1.cpp -o to_ask -L/opt/intel/oneapi/mkl/2022.1.0/lib/intel64 -Wl,--no-as-needed -lmkl_intel_ilp64 -lmkl_sequential -lmkl_core -lpthread -lm -ldl -lgsl -lgslcblas

Here is the screenshot of the output

Please give it a try and do let me know if there is any issue.

Regards,

Vidya.

HSPARK · ‎09-04-2022

Dear Vidya,

Thank you for your continued support, and sorry for the delayed response.
I needed some time to ensure about the problem.

About the result of executing the code, you gave to me.
-> Sadly, it did not fix the problem on my machine.
-> However, this made me check the code on other machines and test with a different test case(much smaller that can display every result).

About the test result of the code you sent and the one I originally wrote on the other machine.
When your code was tested on MacBook Pro (12.5, Mid-2015) and Compute Canada Graham Server - https://docs.alliancecan.ca/wiki/Graham, it seemed the problem was solved with your solution.

Even though it did not return what I was looking for on my machine, it returned the result I was looking for in the others.

I check the code on my machine again with different conditions: WSL + Ubuntu 18.04, Ubuntu 20.04, and Windows (no WSL layer).
However, none of the combinations solved the problems that returned incorrect results that I got initially.
Therefore, I was considering concluding the problem as machine related problem.

However, based on a document that I found in the link you gave to me.
- Sparse BLAS CSC Matrix Storage Format:
https://www.intel.com/content/www/us/en/develop/documentation/onemkl-developer-reference-c/top/appendix-a-linear-solvers-basics/sparse-matrix-storage-formats/sparse-blas-csc-matrix-storage-format.html

- Sparse BLAS CSR Matrix Storage Format
https://www.intel.com/content/www/us/en/develop/documentation/onemkl-developer-reference-c/top/appendix-a-linear-solvers-basics/sparse-matrix-storage-formats/sparse-blas-csr-matrix-storage-format.html

I believe the solution that modifies

// from
MKL_INT *m_idx_ptr = new MKL_INT[num_col + 1];
for (uint32_t idx_ptr = 0; idx_ptr < num_col + 1; ++idx_ptr)

// to
MKL_INT *m_idx_ptr = new MKL_INT[num_col];
for (uint32_t idx_ptr = 0; idx_ptr < num_col; ++idx_ptr)

OR

// from
MKL_INT *m_idx_ptr = new MKL_INT[num_row + 1];
for (uint32_t idx_ptr = 0; idx_ptr < num_row + 1; ++idx_ptr)
// to
MKL_INT *m_idx_ptr = new MKL_INT[num_row ];
for (uint32_t idx_ptr = 0; idx_ptr < num_row ; ++idx_ptr)

cannot be a solution to the problem.

The process

gsl_spmatrix *in_CSR = gsl_spmatrix_crs(gsl_mat_A);
MKL_INT *m_idx_ptr = new MKL_INT[num_row + 1];
for (uint32_t idx_ptr = 0; idx_ptr < num_row + 1; ++idx_ptr){
m_idx_ptr[idx_row_ptr] = in_CSR->p[idx_row_ptr];
}

// Some addtional initialization

mkl_sparse_s_create_csr(&m_mat_A, SPARSE_INDEX_BASE_ZERO, num_row, num_col, m_idx_ptr, m_idx_ptr + 1, m_col_idx, m_mat_ele);

are equal to

gsl_spmatrix *in_CSR = gsl_spmatrix_crs(gsl_mat_A);
MKL_INT *m_idx_ptr_start = new MKL_INT[num_row];
MKL_INT *m_idx_ptr_end = new MKL_INT[num_row];
for (uint32_t idx_ptr = 0; idx_ptr < num_row; ++idx_ptr){
m_idx_ptr_start = in_CSR->p[idx_row_ptr];
m_idx_ptr_end = in_CSR->p[idx_row_ptr + 1];
}

// some addtional initialization

mkl_sparse_s_create_csr(&m_mat_A, SPARSE_INDEX_BASE_ZERO, num_row, num_col, m_idx_ptr_start, m_idx_ptr_end , m_col_idx, m_mat_ele);

Based on the example from `Sparse BLAS CSR Matrix Storage Format,` it is the same as converting three to four array variations.

I.e.

// with MKL_INT *m_idx_ptr = new MKL_INT[num_row + 1]
in_CSR->p[idx_row_ptr] = [1, 4, 6, 9, 12, 14];
m_idx_ptr = [1, 4, 6, 9, 12, 14];
*m_idx_ptr = [1, 4, 6, 9, 12]; // = pointerB
*(m_idx_ptr + 1) = [4, 6, 9, 12, 14]; // = pointeE

and

// with m_idx_ptr_start = in_CSR->p[idx_row_ptr] 
// and m_idx_ptr_end = in_CSR->p[idx_row_ptr + 1]
in_CSR->p[idx_row_ptr] = [1, 4, 6, 9, 12, 14];
*m_idx_ptr_start = [1, 4, 6, 9, 12] // = pointerB
*m_idx_ptr_end = [4, 6, 9, 12, 14] // = pointeE

while

//with MKL_INT *m_idx_ptr = new MKL_INT[num_row] is used the following will be given
m_idx_ptr = [1, 4, 6, 9, 12];
*m_idx_ptr = [1, 4, 6, 9, 12]; // = pointerB
*(m_idx_ptr + 1) = [4, 6, 9, 12, ??]; or source of segfault // = pointeE

Do I misunderstand the documents?

VidyalathaB_Intel · ‎09-12-2022

Hi Hong,

Thanks for the detailed information.

>>When your code was tested on MacBook Pro (12.5, Mid-2015) and Compute Canada Graham Server - https://docs.alliancecan.ca/wiki/Graham, it seemed the problem was solved with your solution.

Could you please let us know if you have tested your (original) code on mac and happened to see the results that you are expecting?

>>I check the code on my machine again with different conditions: WSL + Ubuntu 18.04, Ubuntu 20.04, and Windows (no WSL layer). Therefore, I was considering concluding the problem as machine related problem.

Here is the link for the system requirements of oneMKL if you think the issue is machine specific.

https://www.intel.com/content/www/us/en/developer/articles/system-requirements/oneapi-math-kernel-library-system-requirements.html

Regarding the code snippet which you have provided with calculations of

m_idx_ptr_start = in_CSR->p[idx_row_ptr];

m_idx_ptr_end = in_CSR->p[idx_row_ptr + 1];

Does it resolve the issue?

Please do let us know so that we can do the needful.

Regards,

Vidya.

VidyalathaB_Intel · ‎09-18-2022

Hi Hong,

As we haven't heard back from you, could you please provide us with an update regarding the issue?

Regards,

Vidya.

HSPARK · ‎09-19-2022

Dear Vidya,

I am terribly sorry for the late response.
I did not notice I received the answer.

About "Could you please let us know if you have tested your (original) code on mac and happened to see the results that you are expecting?"
=> When I tested my original code

MKL_INT *m_idx_ptr = new MKL_INT[num_col + 1]; or
MKL_INT *m_idx_ptr = new MKL_INT[num_row + 1];

My solution did not return the expected result for ohne2.mtx.

To ensure my solution is wrong, another matrix(dimension is much smaller and value is much small as well), `LFAT5.mtx`, is tested. (LFAT5's Dimension is 14 while ohne2's is 181343)
My solution returned the required result for LFAT5.mtx, while the given solution returned incorrect results.

The last elements of computation of SpMV and SpMV_T are incorrect as

MKL_INT *m_idx_ptr = new MKL_INT[num_col]; or
MKL_INT *m_idx_ptr = new MKL_INT[num_row];

make computation skips the last row or column in the matrix.

About "Regarding the code snippet which you have provided with calculations of
m_idx_ptr_start = in_CSR->p[idx_row_ptr];
m_idx_ptr_end = in_CSR->p[idx_row_ptr + 1];
Does it resolve the issue?"

=> Sadly, it could not solve the problem either.
I made mistake in the code snippet that

m_idx_ptr_end = in_CSR->p[idx_row_ptr + 1]; should be
m_idx_ptr_end = in_CSR->p[idx_row_ptr];

But the tested code is the corrected one.

The method in the code snippet returns the correct result for `LFAT5.mtx` but not for `ohne2`.

Once again, I apologize for the late response and thank you for your continued support.

VidyalathaB_Intel · ‎09-19-2022

Hi Hong,

Thanks for getting back to us.

Please correct me if I miss anything here.

So the issue (incorrect results) comes when using ohne2.mtx input file but when working with LFAT5.mtx your code is working fine and giving correct results?

If possible please attach the LFAT5.mtx file here so that we can test it from our end and proceed further in this case.

Regards,

Vidya.

HSPARK · ‎09-21-2022

Thank you for the response, Vidya.

The matrix `LFAT5.mtx` can be downloaded with the following
wget https://suitesparse-collection-website.herokuapp.com/MM/Oberwolfach/LFAT5.tar.gz
and the file named `LFAT5.mtx` is the one I used.

I think it would be better to make clear the issue to ensure there is no miscommunication.
Until this point, the case that returns the mismatched result between the COO, CSC, and CSR is the combination of ohne2.mtx with single-precision computation.

There can be matrices that show similar behaviour, but the combination of ohne2.mtx and single-precision is the only case I observed.

Thank you for your support.

Hong Sung Park

VidyalathaB_Intel · ‎09-27-2022

Hi Hong,

Thanks for providing us with the details.

As you mentioned the issue cannot be seen with LFAT5.mtx matrix file but yes we are getting wrong results with ohne2.mtx.

We have forwarded this issue to concerned developement team. we are working on your issue we will get back to you soon.

Thanks for your patience.

Regards,

Vidya.

VidyalathaB_Intel · ‎12-20-2022

Hi @HSPARK ,

Thanks for your patience.

I'm glad to inform you that the issue raised by you is fixed in the latest version oneMKL 2023.0.0 which is now available for download.

Please try running the code with 2023.0.0 oneMKL version and do let us know if it resolves the issue.

A quick check from my end shows that the results of SP_MV are the same in all the matrix formats i.e CSC, CSR, and COO with ohne2.mtx file.

Here is the screenshot of the output

Please check it once with the latest version of oneMKL (you can download the latest oneAPI Base Toolkit to get it and here is the link https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit-download.html) and confirm us so that we can close this thread if there are no further queries.

Regards,

Vidya.

VidyalathaB_Intel · ‎12-29-2022

Hi @HSPARK ,

As the issue is already addressed and the fix is provided, we are going ahead and closing this issue. Please post a new question if you need any additional assistance from Intel as this thread will no longer be monitored.

Regards,

Vidya.

HSPARK · ‎01-01-2023

Thank you for the care, and sorry for the late reply, Vidya.
I appreciate your support, and it seems everything is fine now.

Results mismatch in SpMV_T with COO/CSC/CSR when single-precision is used.

Error