Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.
7061 Discussions

Is it save to call ?feast_syev/?feast_heev from multiple threads?

Arne_K_
Beginner
486 Views

Hi,

I'm developing an application that needs to compute various eigenvalue decompositions. Is it possible to call zfeast_heev from multiple threads in parallel? Ofcourse, each thread has it's own memory. I could not find this kind of information in the documentation. Currently I'm using zhpevd, which works fine when called from multiple threads, zfeast_heev however, do not.

Looking forward to your answers

 

0 Kudos
4 Replies
Gennady_F_Intel
Moderator
486 Views

yes, it should works in this environment correctly. In the case if there is any concern, please give us the example for investigation.

0 Kudos
Arne_K_
Beginner
486 Views

Hi,

I made a small program similar to my usage that shows the error.

Can you reproduce the error?

#include "mkl.h"

#include <boost/thread/thread.hpp>
#include <boost/bind.hpp>
#include <boost/random/normal_distribution.hpp>
#include <boost/random/mersenne_twister.hpp>

#include <vector>
#include <iostream>

#include <cstdio>

//#define USE_THREADS
//#define LOWER_ONLY

struct RunData {
 std::vector<MKL_Complex16> C;
 std::vector<MKL_Complex16> U;
 std::vector<double> D;
 MKL_INT info;
 MKL_INT dim;
 double e_max;
};

struct ThreadCache {
 MKL_INT *fpm;
 double *epsout;
 double *res;
};
 
void compute_eigenvalues( RunData& data, ThreadCache& cache ) {

 feastinit( cache.fpm );

 MKL_INT loop;
 double e_min = 0;
 double e_max = data.e_max;
 MKL_INT n   = data.dim;
 MKL_INT lda = data.dim;
 MKL_INT m0  = data.dim;
 MKL_INT m   = data.dim;
#ifdef LOWER_ONLY
 char uplo   = 'U';
#else
 char uplo   = 'F';
#endif

 zfeast_heev(&uplo, &n, &data.C[0], &lda, cache.fpm, cache.epsout, &loop, &e_min, &e_max, &m0, &data.D[0], &data.U[0], &m, cache.res, &data.info );

};

void thread_compute_function( std::vector< RunData >* data, unsigned int start, unsigned int end ) {
 if( data->empty() || start >= end || data->size() < end ) {
  return;
 }

 std::vector<MKL_INT> fpm(128);
 std::vector<double>  epsout( (*data)[0].dim * (*data)[0].dim );
 std::vector<double>  res( (*data)[0].dim * (*data)[0].dim );

 ThreadCache tc;
 tc.fpm    = &(fpm[0]);
 tc.epsout = &(epsout[0]);
 tc.res    = &(res[0]);


 for( unsigned int i = start; i < end; ++i ) {
  std::cerr << "compute eigenvalues ( " << (i+1-start) << " / " << end-start << " )\n";
  compute_eigenvalues( (*data), tc );
 }
}

// generate random vectors and compute an estimated corrolation matrix
void setup_data( std::vector< RunData > &data, unsigned int n_data, unsigned int n_dims, unsigned int n_loops = 5 ) {

 boost::random::mt19937 rng( 0 );
 boost::random::normal_distribution<double> normal_dist( 0.15, 3.21 );

 data.resize( n_data );
 for( unsigned int i_data = 0; i_data < n_data; ++i_data ) {

#ifdef LOWER_ONLY
  unsigned int size_C = ((n_dims+1) * n_dims)/2;
#else
  unsigned int size_C = n_dims * n_dims;
#endif

  RunData &r_data = data[i_data];
  r_data.dim = n_dims;
  r_data.e_max = 0.0;
  r_data.info = 0;
  r_data.C.resize( size_C );
  r_data.U.resize( n_dims * n_dims );
  r_data.D.resize( n_dims );

  std::vector< MKL_Complex16 > X( n_dims );

  for( unsigned int i_loop = 0; i_loop < n_loops; ++i_loop ) {

   unsigned int cnt = 0;
   // generate random vector
   for( unsigned int i_dim = 0; i_dim < n_dims; ++i_dim ) {
    X[i_dim].real = normal_dist( rng );
    X[i_dim].imag = normal_dist( rng );
   }

   for( unsigned int i = 0; i < n_dims; ++i ) {
#ifdef LOWER_ONLY
    unsigned int j_start = i;
#else
    unsigned int j_start = 0;
#endif
    for( unsigned int j = j_start; j < n_dims; ++j ) {
     MKL_Complex16 x_ij;
     x_ij.real = X.real * X.real + X.imag * X.imag / (n_loops * n_loops);
     x_ij.imag = X.imag * X.real - X.real * X.imag / (n_loops * n_loops);

     r_data.C[cnt++] = x_ij;
    }
   }
  }

  double sum = 0;
  unsigned int cnt = 0;
  for( unsigned int i = 0; i < n_dims; ++i ) {
   #ifdef LOWER_ONLY
    unsigned int j_start = i;
#else
    unsigned int j_start = 0;
#endif
    for( unsigned int j = j_start; j < n_dims; ++j ) {
     if( i == j )
      sum += r_data.C[cnt].real * r_data.C[cnt].real + r_data.C[cnt].imag * r_data.C[cnt].imag;
     ++cnt;
    }
  }

  r_data.e_max = sum;
 }
}

int main(int argc, char* argv[])
{
 unsigned int const n_data  = 20;
 unsigned int const n_dims  = 50;

 std::vector< RunData > data;

 setup_data( data, n_data, n_dims );

#ifdef USE_THREADS
 unsigned int const n_threads = 4;

 boost::thread_group thread_pool;

 unsigned int data_per_thread = std::max<unsigned int>( 1, data.size() / n_threads );

 for( unsigned int i_thread = 0; i_thread < n_threads; ++i_thread ) {
  unsigned int start = i_thread * data_per_thread;
  unsigned int end = std::min( (i_thread+1) * data_per_thread, data.size() );
  thread_pool.create_thread( boost::bind( thread_compute_function, &data, start, end ) );
 }

 thread_pool.join_all();
#else
 thread_compute_function( &data, 0, data.size() );
#endif


 for( std::vector< RunData >::const_iterator iter = data.begin(); iter != data.end(); ++iter ) {
  std::cout << "zfeast_heev info value: " << iter->info << std::endl;
 }

 std::cout << "Done!\n";

 getchar();

 std::cout << "Closing\n";

 return 0;
}

 

0 Kudos
Gennady_F_Intel
Moderator
486 Views

I see the crush even when mkl_set-num_threads(1). I suspeсе you need to check how to split the input data before mkl routine.

0 Kudos
Arne_K_
Beginner
486 Views

Hi,

the input data is all independent of each other so I don't know what es to check for. If USE_THREADS is not defined than everything work but if it is defined it often but not always crashes. As the standard divide and conquer methods works flawlessly when used in parallel I don't understand why this function does not.

For now I will not use this method in my scenario as I can't get it to work properly.

Thank you for your time and comments

0 Kudos
Reply