- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I wrote my function to compute the pseudo-inverse for skinny matrices, using the following function and armadillo
inline void inverse_sym_matrix_lapack(double * a, const MKL_INT n, char uplo = 'L') {
const auto ipv = static_cast<int*>(malloc(n * sizeof(int)));
MKL_INT lwork = -1;
//Querying, finding optimal lwork
double wkopt;
MKL_INT info;
dsytrf(&uplo, &n, a, &n, ipv, &wkopt, &lwork, &info);
lwork = static_cast<MKL_INT>(wkopt);
const auto work = static_cast<double*>(malloc(lwork * sizeof(double)));
dsytrf(&uplo, &n, a, &n, ipv, work, &lwork, &info);
if (info != 0)
{
std::cout << info << "\n";
throw std::runtime_error("dsytrf failed");
}
dsytri(&uplo, &n, a, &n, ipv, work, &info);
if (info != 0)
{
std::cout << info << "\n";
throw std::runtime_error("dsytri failed");
}
free(ipv);
free(work);
}
inline arma::mat pinvDirectSkinny(const arma::mat& source)
{
MKL_INT n_rows = source.n_rows;
MKL_INT n_cols = source.n_cols;
if (!(n_cols <= n_rows))
{
throw std::runtime_error("Size error: the matrix must be skinny!");
}
arma::mat inv(n_cols, n_cols);
const auto inv_ptr = inv.memptr();
const double alpha = 1;
const double beta = 0;
const char uplo = 'L';
//inv is now source.t() * source;
dsyrk(&uplo,"T" , &n_cols, &n_rows , &alpha, source.memptr(), &n_rows, &beta, inv_ptr, &n_cols );
//inv is now inv(source.t() * source);
inverse_sym_matrix_lapack(inv_ptr,n_cols, uplo);
//Filling the diagonal to use dgemm
for (int col = 1; col < n_cols; col++)
{
for (int row = 0; row < col; row++)
{
inv(row, col) = inv(col, row);
}
}
//Generic LAPACK
//inverse_generic_matrix_lapack(inv);
//inv= inv(source.t() * source) * source.t()
const MKL_INT inv_n_rows = n_cols;
const MKL_INT inv_n_cols = n_cols;
arma::mat res(n_cols, n_rows);
//res=inv(source.t() * source) * source.t();
dgemm("N", "T", &inv_n_rows, &n_rows, &inv_n_cols, &alpha, inv_ptr, &inv_n_rows, source.memptr(), &n_rows, &beta, res.memptr(), &inv_n_rows);
return res;
}
and after some profiling (attached), it seems the most time is spend by dsyrk freeing a allocating memory (see attachment)
Is that working as intended?
Is there a way to avoid that?
I observed that with VS2019 and intel mkl 2019 u4.
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
at first glance, this is not expected behavior.... as syrk shouldn't internally allocate some buffers. in any case, you may try to set MKL_DISABLE_FAST_MM environment variable to 1 and check if the profile would be the same.
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page