Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.
Announcements
This community is designed for sharing of public information. Please do not share Intel or third-party confidential information here.

## unexpected copy in dsyrk Beginner
167 Views
```I wrote my function to compute the pseudo-inverse for skinny matrices, using the following function and armadillo

inline void inverse_sym_matrix_lapack(double * a, const MKL_INT n, char uplo = 'L') {
const auto ipv = static_cast<int*>(malloc(n * sizeof(int)));
MKL_INT  lwork = -1;
//Querying, finding optimal lwork
double wkopt;
MKL_INT info;
dsytrf(&uplo, &n, a, &n, ipv, &wkopt, &lwork, &info);
lwork = static_cast<MKL_INT>(wkopt);
const auto work = static_cast<double*>(malloc(lwork * sizeof(double)));
dsytrf(&uplo, &n, a, &n, ipv, work, &lwork, &info);
if (info != 0)
{
std::cout << info << "\n";
throw std::runtime_error("dsytrf failed");
}
dsytri(&uplo, &n, a, &n, ipv, work, &info);
if (info != 0)
{
std::cout << info << "\n";
throw std::runtime_error("dsytri failed");
}
free(ipv);
free(work);
}

inline arma::mat pinvDirectSkinny(const arma::mat& source)
{
MKL_INT n_rows = source.n_rows;
MKL_INT n_cols = source.n_cols;
if (!(n_cols <= n_rows))
{
throw std::runtime_error("Size error: the matrix must be skinny!");
}
arma::mat inv(n_cols, n_cols);
const auto inv_ptr = inv.memptr();
const double alpha = 1;
const double beta = 0;
const char uplo = 'L';
//inv is now source.t() * source;
dsyrk(&uplo,"T" , &n_cols, &n_rows , &alpha, source.memptr(), &n_rows, &beta, inv_ptr, &n_cols );
//inv is now inv(source.t() * source);
inverse_sym_matrix_lapack(inv_ptr,n_cols, uplo);
//Filling the diagonal to use dgemm
for (int col = 1; col < n_cols; col++)
{
for (int row = 0; row < col; row++)
{
inv(row, col) = inv(col, row);
}
}
//Generic LAPACK
//inverse_generic_matrix_lapack(inv);
//inv= inv(source.t() * source) * source.t()
const MKL_INT inv_n_rows = n_cols;
const MKL_INT inv_n_cols = n_cols;
arma::mat res(n_cols, n_rows);
//res=inv(source.t() * source) * source.t();
dgemm("N", "T", &inv_n_rows, &n_rows, &inv_n_cols, &alpha, inv_ptr, &inv_n_rows, source.memptr(), &n_rows, &beta, res.memptr(), &inv_n_rows);
return res;
}
```

and after some profiling (attached), it seems the most time is spend by dsyrk  freeing a allocating memory (see attachment)

Is that working as intended?

Is there a way to avoid that?

I observed that with VS2019 and intel mkl 2019 u4.  