- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I'm developing an application that needs to compute various eigenvalue decompositions. Is it possible to call zfeast_heev from multiple threads in parallel? Ofcourse, each thread has it's own memory. I could not find this kind of information in the documentation. Currently I'm using zhpevd, which works fine when called from multiple threads, zfeast_heev however, do not.
Looking forward to your answers
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
yes, it should works in this environment correctly. In the case if there is any concern, please give us the example for investigation.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I made a small program similar to my usage that shows the error.
Can you reproduce the error?
#include "mkl.h" #include <boost/thread/thread.hpp> #include <boost/bind.hpp> #include <boost/random/normal_distribution.hpp> #include <boost/random/mersenne_twister.hpp> #include <vector> #include <iostream> #include <cstdio> //#define USE_THREADS //#define LOWER_ONLY struct RunData { std::vector<MKL_Complex16> C; std::vector<MKL_Complex16> U; std::vector<double> D; MKL_INT info; MKL_INT dim; double e_max; }; struct ThreadCache { MKL_INT *fpm; double *epsout; double *res; }; void compute_eigenvalues( RunData& data, ThreadCache& cache ) { feastinit( cache.fpm ); MKL_INT loop; double e_min = 0; double e_max = data.e_max; MKL_INT n = data.dim; MKL_INT lda = data.dim; MKL_INT m0 = data.dim; MKL_INT m = data.dim; #ifdef LOWER_ONLY char uplo = 'U'; #else char uplo = 'F'; #endif zfeast_heev(&uplo, &n, &data.C[0], &lda, cache.fpm, cache.epsout, &loop, &e_min, &e_max, &m0, &data.D[0], &data.U[0], &m, cache.res, &data.info ); }; void thread_compute_function( std::vector< RunData >* data, unsigned int start, unsigned int end ) { if( data->empty() || start >= end || data->size() < end ) { return; } std::vector<MKL_INT> fpm(128); std::vector<double> epsout( (*data)[0].dim * (*data)[0].dim ); std::vector<double> res( (*data)[0].dim * (*data)[0].dim ); ThreadCache tc; tc.fpm = &(fpm[0]); tc.epsout = &(epsout[0]); tc.res = &(res[0]); for( unsigned int i = start; i < end; ++i ) { std::cerr << "compute eigenvalues ( " << (i+1-start) << " / " << end-start << " )\n"; compute_eigenvalues( (*data), tc ); } } // generate random vectors and compute an estimated corrolation matrix void setup_data( std::vector< RunData > &data, unsigned int n_data, unsigned int n_dims, unsigned int n_loops = 5 ) { boost::random::mt19937 rng( 0 ); boost::random::normal_distribution<double> normal_dist( 0.15, 3.21 ); data.resize( n_data ); for( unsigned int i_data = 0; i_data < n_data; ++i_data ) { #ifdef LOWER_ONLY unsigned int size_C = ((n_dims+1) * n_dims)/2; #else unsigned int size_C = n_dims * n_dims; #endif RunData &r_data = data[i_data]; r_data.dim = n_dims; r_data.e_max = 0.0; r_data.info = 0; r_data.C.resize( size_C ); r_data.U.resize( n_dims * n_dims ); r_data.D.resize( n_dims ); std::vector< MKL_Complex16 > X( n_dims ); for( unsigned int i_loop = 0; i_loop < n_loops; ++i_loop ) { unsigned int cnt = 0; // generate random vector for( unsigned int i_dim = 0; i_dim < n_dims; ++i_dim ) { X[i_dim].real = normal_dist( rng ); X[i_dim].imag = normal_dist( rng ); } for( unsigned int i = 0; i < n_dims; ++i ) { #ifdef LOWER_ONLY unsigned int j_start = i; #else unsigned int j_start = 0; #endif for( unsigned int j = j_start; j < n_dims; ++j ) { MKL_Complex16 x_ij; x_ij.real = X.real * X.real + X.imag * X .imag / (n_loops * n_loops); x_ij.imag = X.imag * X .real - X.real * X .imag / (n_loops * n_loops); r_data.C[cnt++] = x_ij; } } } double sum = 0; unsigned int cnt = 0; for( unsigned int i = 0; i < n_dims; ++i ) { #ifdef LOWER_ONLY unsigned int j_start = i; #else unsigned int j_start = 0; #endif for( unsigned int j = j_start; j < n_dims; ++j ) { if( i == j ) sum += r_data.C[cnt].real * r_data.C[cnt].real + r_data.C[cnt].imag * r_data.C[cnt].imag; ++cnt; } } r_data.e_max = sum; } } int main(int argc, char* argv[]) { unsigned int const n_data = 20; unsigned int const n_dims = 50; std::vector< RunData > data; setup_data( data, n_data, n_dims ); #ifdef USE_THREADS unsigned int const n_threads = 4; boost::thread_group thread_pool; unsigned int data_per_thread = std::max<unsigned int>( 1, data.size() / n_threads ); for( unsigned int i_thread = 0; i_thread < n_threads; ++i_thread ) { unsigned int start = i_thread * data_per_thread; unsigned int end = std::min( (i_thread+1) * data_per_thread, data.size() ); thread_pool.create_thread( boost::bind( thread_compute_function, &data, start, end ) ); } thread_pool.join_all(); #else thread_compute_function( &data, 0, data.size() ); #endif for( std::vector< RunData >::const_iterator iter = data.begin(); iter != data.end(); ++iter ) { std::cout << "zfeast_heev info value: " << iter->info << std::endl; } std::cout << "Done!\n"; getchar(); std::cout << "Closing\n"; return 0; }
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I see the crush even when mkl_set-num_threads(1). I suspeсе you need to check how to split the input data before mkl routine.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
the input data is all independent of each other so I don't know what es to check for. If USE_THREADS is not defined than everything work but if it is defined it often but not always crashes. As the standard divide and conquer methods works flawlessly when used in parallel I don't understand why this function does not.
For now I will not use this method in my scenario as I can't get it to work properly.
Thank you for your time and comments
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page