- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
We are running multiple FEAST eigenvalue searches (dfeast_scsrgv) in parallel on different intervals.
When run sequentially, the eigenvalues are perfectly reproducible.
When run concurrently, results vary between executions unless we protect dfeast_scsrgv with a mutex.
Environment:
- Intel MKL version: Intel oneAPI mkl 2023.1.0
- OS: Windows 2011
- Compiler: MSVC
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thank you for posting in the forum!
Could you share more details how the FEAST solvers are called? Are they called in an OpenMP region with one thread for each of them? It will be great if you could share a code to reproduce this issue.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
Thank you for your reply.
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Here is the relevant code section:
bool
MklSymMatrixSolver::EigenSystem(int neig, int m, double *eig, double *eiv, int &neig_found)
{
if (!matrix_a_ && !matrix_b_)
return false;
std::vector<std::pair<double,double>> intervals;
intervals.push_back(std::make_pair(0.1,1.5));
intervals.push_back(std::make_pair(1.5,3));
intervals.push_back(std::make_pair(3,6));
intervals.push_back(std::make_pair(6,15));
intervals.push_back(std::make_pair(15,30));
intervals.push_back(std::make_pair(30,60));
intervals.push_back(std::make_pair(60,120));
intervals.push_back(std::make_pair(120,250));
intervals.push_back(std::make_pair(250,500));
int last = 10000;
for (int i =500, shift =250; i<last; i+=shift)
intervals.push_back(std::make_pair(i,i+shift));
intervals.push_back(std::make_pair(last,1e10));
std::vector<EigenValuesAndVectors*> eigvs;
int saved_neig = neig;
int tot_eigen_found= 0;
int i_eig_start = 0;
int i_eiv_start = 0;
size_t grp_shift = intervals.size()>1? thread::hardware_concurrency() / 4 :1;
size_t remaning_intervals = intervals.size();
for (auto grp_it = intervals.begin(); grp_it!= intervals.end() && neig>0 && grp_shift>0; grp_it+=grp_shift){
thread_group grp_thread;
for (auto it= grp_it; it!= grp_it+grp_shift; it++){
EigenValuesAndVectors *eigv = new EigenValuesAndVectorsFeast(matrix_a_,matrix_b_,*it,neig,m);
eigvs.push_back(eigv);
grp_thread.create_thread(bind(&EigenValuesAndVectors::Compute, eigv));
}
grp_thread.join_all();
for (auto it= eigvs.begin(); it!= eigvs.end() && neig>0; it++){
EigenValuesAndVectors *eigv = *it;
neig_found = eigv->GetNeigFound();
bool cond = neig_found<neig;
if ((*it)->Found()){
#ifdef _DEBUG
int i_eig_start_bck = i_eig_start;
#endif
eigv->FillExternalVectors(eig,eiv,i_eig_start,i_eiv_start, neig);
#ifdef _DEBUG
std::cout<<"found "<< (cond?neig_found:neig)<<" eigenvalue\\s within interval:"<<eigv->GetInterval().first<<"-"<<eigv->GetInterval().second<<std::endl;
for (int i=i_eig_start_bck; i<i_eig_start ; i++)
std::cout<<eig[i]<<std::endl;
#endif
tot_eigen_found+=cond?neig_found:neig;
neig-=cond?neig_found:neig;
}
delete eigv;
}
eigvs.clear();
remaning_intervals-=grp_shift;
grp_shift = remaning_intervals>grp_shift? grp_shift:remaning_intervals;
}
neig_found= tot_eigen_found;
return true;
} These are the classes used in the above code, to compute the eigenvalues in each interval:
class EigenValuesAndVectors
{
public:
EigenValuesAndVectors(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
A_(A),B_(B), interval_(interval)
{
mat_type_ = 'U';
feastinit (fpm_);
fpm_[1] = 3; //number of contour points Ne = 8
fpm_[3] = 2; //refinement loops
//fpm_[27] = 1; //check positive definte
epsout_ = 0.0; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
loop_ = 0; //On output, contains the number of refinement loop executed. Ignored on input.
m0_ = static_cast<int>(ceil(neig*1.5)); //Total number of eigenvalues to be computed
leiv_ = new double [m0_ *m];
leig_ = new double [m0_];
res_ = new double[m0_];
info_ = 0;
}
virtual void Compute()=0;
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig ) = 0;
bool Found() const
{
return info_ == 0 || info_ == 3;
}
int GetNeigFound() const
{
return neig_found_;
}
const double* GetEiv() const
{
return leiv_;
}
const double* GetEig() const
{
return leig_;
}
const std::pair<double, double> & GetInterval() const
{
return interval_;
}
~EigenValuesAndVectors()
{
delete [] leiv_;
delete [] leig_;
delete [] res_;
}
protected:
char mat_type_;
MKL_INT fpm_[128]; //see https://software.intel.com/en-us/node/470386
double epsout_; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
MKL_INT loop_; //On output, contains the number of refinement loop executed. Ignored on input.
MKL_INT m0_; //Total number of eigenvalues to be computed
const std::pair<double,double> &interval_;
std::shared_ptr<MklSymMatrix> A_;
std::shared_ptr<MklSymMatrix> B_;
int neig_found_;
double *leiv_;
double *leig_;
double *res_;
MKL_INT info_;
};
class EigenValuesAndVectorsFeast: public EigenValuesAndVectors
{
public:
EigenValuesAndVectorsFeast(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
EigenValuesAndVectors(A,B, interval,neig,m)
{}
virtual void Compute()
{
double emin = 1/interval_.second; //The lower bounds of the interval to be searched for eigenvalues; emin < emax.
double emax = 1/interval_.first; //The upper bounds of the interval to be searched for eigenvalues; emin < emax.
int num_of_mat_row = A_->N();
std::lock_guard<std::mutex> lock(feast_mutex_);
dfeast_scsrgv(&mat_type_, &num_of_mat_row, B_->A(),B_->Ia(),B_->Ja(), A_->A(),A_->Ia(),A_->Ja(),
fpm_,&epsout_,&loop_,&emin,&emax,&m0_,leig_,leiv_,&neig_found_,res_,&info_);
}
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig )
{
bool cond = neig_found_<neig;
int exit_loop = cond?0:neig_found_-neig;
for (int ie= neig_found_-1; ie>= exit_loop; ie--, i_eig_start++)
eig[i_eig_start]= 1/leig_[ie];
int m = A_->N();
for (int ie= (neig_found_-1)*m; ie>=exit_loop*m; ie-=m)
for (int j = ie; j<ie+m; j++)
eiv[i_eiv_start++]= leiv_[j];
}
protected:
static std::mutex feast_mutex_;
}; The results become perfectly reproducible (same as the sequential version).
Without the mutex, the computed eigenvalues vary slightly between runs.
So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
Thanks in advance.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Here is the relevant code section:
bool
MklSymMatrixSolver::EigenSystem(int neig, int m, double *eig, double *eiv, int &neig_found)
{
if (!matrix_a_ && !matrix_b_)
return false;
std::vector<std::pair<double,double>> intervals;
intervals.push_back(std::make_pair(0.1,1.5));
intervals.push_back(std::make_pair(1.5,3));
intervals.push_back(std::make_pair(3,6));
intervals.push_back(std::make_pair(6,15));
intervals.push_back(std::make_pair(15,30));
intervals.push_back(std::make_pair(30,60));
intervals.push_back(std::make_pair(60,120));
intervals.push_back(std::make_pair(120,250));
intervals.push_back(std::make_pair(250,500));
int last = 10000;
for (int i =500, shift =250; i<last; i+=shift)
intervals.push_back(std::make_pair(i,i+shift));
intervals.push_back(std::make_pair(last,1e10));
std::vector<EigenValuesAndVectors*> eigvs;
int saved_neig = neig;
int tot_eigen_found= 0;
int i_eig_start = 0;
int i_eiv_start = 0;
size_t grp_shift = intervals.size()>1? thread::hardware_concurrency() / 4 :1;
size_t remaning_intervals = intervals.size();
for (auto grp_it = intervals.begin(); grp_it!= intervals.end() && neig>0 && grp_shift>0; grp_it+=grp_shift){
thread_group grp_thread;
for (auto it= grp_it; it!= grp_it+grp_shift; it++){
EigenValuesAndVectors *eigv = new EigenValuesAndVectorsFeast(matrix_a_,matrix_b_,*it,neig,m);
eigvs.push_back(eigv);
grp_thread.create_thread(bind(&EigenValuesAndVectors::Compute, eigv));
}
grp_thread.join_all();
for (auto it= eigvs.begin(); it!= eigvs.end() && neig>0; it++){
EigenValuesAndVectors *eigv = *it;
neig_found = eigv->GetNeigFound();
bool cond = neig_found<neig;
if ((*it)->Found()){
#ifdef _DEBUG
int i_eig_start_bck = i_eig_start;
#endif
eigv->FillExternalVectors(eig,eiv,i_eig_start,i_eiv_start, neig);
#ifdef _DEBUG
std::cout<<"found "<< (cond?neig_found:neig)<<" eigenvalue\\s within interval:"<<eigv->GetInterval().first<<"-"<<eigv->GetInterval().second<<std::endl;
for (int i=i_eig_start_bck; i<i_eig_start ; i++)
std::cout<<eig[i]<<std::endl;
#endif
tot_eigen_found+=cond?neig_found:neig;
neig-=cond?neig_found:neig;
}
delete eigv;
}
eigvs.clear();
remaning_intervals-=grp_shift;
grp_shift = remaning_intervals>grp_shift? grp_shift:remaning_intervals;
}
neig_found= tot_eigen_found;
return true;
}
The classes used to compute the eigenvalues in each interval are the following:
class EigenValuesAndVectors
{
public:
EigenValuesAndVectors(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
A_(A),B_(B), interval_(interval)
{
mat_type_ = 'U';
feastinit (fpm_);
fpm_[1] = 3; //number of contour points Ne = 8
fpm_[3] = 2; //refinement loops
//fpm_[27] = 1; //check positive definte
epsout_ = 0.0; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
loop_ = 0; //On output, contains the number of refinement loop executed. Ignored on input.
m0_ = static_cast<int>(ceil(neig*1.5)); //Total number of eigenvalues to be computed
leiv_ = new double [m0_ *m];
leig_ = new double [m0_];
res_ = new double[m0_];
info_ = 0;
}
virtual void Compute()=0;
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig ) = 0;
bool Found() const
{
return info_ == 0 || info_ == 3;
}
int GetNeigFound() const
{
return neig_found_;
}
const double* GetEiv() const
{
return leiv_;
}
const double* GetEig() const
{
return leig_;
}
const std::pair<double, double> & GetInterval() const
{
return interval_;
}
~EigenValuesAndVectors()
{
delete [] leiv_;
delete [] leig_;
delete [] res_;
}
protected:
char mat_type_;
MKL_INT fpm_[128]; //see https://software.intel.com/en-us/node/470386
double epsout_; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
MKL_INT loop_; //On output, contains the number of refinement loop executed. Ignored on input.
MKL_INT m0_; //Total number of eigenvalues to be computed
const std::pair<double,double> &interval_;
std::shared_ptr<MklSymMatrix> A_;
std::shared_ptr<MklSymMatrix> B_;
int neig_found_;
double *leiv_;
double *leig_;
double *res_;
MKL_INT info_;
};
class EigenValuesAndVectorsFeast: public EigenValuesAndVectors
{
public:
EigenValuesAndVectorsFeast(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
EigenValuesAndVectors(A,B, interval,neig,m)
{}
virtual void Compute()
{
double emin = 1/interval_.second; //The lower bounds of the interval to be searched for eigenvalues; emin < emax.
double emax = 1/interval_.first; //The upper bounds of the interval to be searched for eigenvalues; emin < emax.
int num_of_mat_row = A_->N();
std::lock_guard<std::mutex> lock(feast_mutex_);
dfeast_scsrgv(&mat_type_, &num_of_mat_row, B_->A(),B_->Ia(),B_->Ja(), A_->A(),A_->Ia(),A_->Ja(),
fpm_,&epsout_,&loop_,&emin,&emax,&m0_,leig_,leiv_,&neig_found_,res_,&info_);
}
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig )
{
bool cond = neig_found_<neig;
int exit_loop = cond?0:neig_found_-neig;
for (int ie= neig_found_-1; ie>= exit_loop; ie--, i_eig_start++)
eig[i_eig_start]= 1/leig_[ie];
int m = A_->N();
for (int ie= (neig_found_-1)*m; ie>=exit_loop*m; ie-=m)
for (int j = ie; j<ie+m; j++)
eiv[i_eiv_start++]= leiv_[j];
}
protected:
static std::mutex feast_mutex_;
};As mentioned, if I protect the call to dfeast_scsrgv with a global mutex, the results become perfectly reproducible (same as the sequential version). Without the mutex, the computed eigenvalues vary slightly between runs,even when mkl_set_num_threads(1) is used. So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
Thank you for your reply.
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Here is the relevant code section:
bool
MklSymMatrixSolver::EigenSystem(int neig, int m, double *eig, double *eiv, int &neig_found)
{
if (!matrix_a_ && !matrix_b_)
return false;
std::vector<std::pair<double,double>> intervals;
intervals.push_back(std::make_pair(0.1,1.5));
intervals.push_back(std::make_pair(1.5,3));
intervals.push_back(std::make_pair(3,6));
intervals.push_back(std::make_pair(6,15));
intervals.push_back(std::make_pair(15,30));
intervals.push_back(std::make_pair(30,60));
intervals.push_back(std::make_pair(60,120));
intervals.push_back(std::make_pair(120,250));
intervals.push_back(std::make_pair(250,500));
int last = 10000;
for (int i =500, shift =250; i<last; i+=shift)
intervals.push_back(std::make_pair(i,i+shift));
intervals.push_back(std::make_pair(last,1e10));
std::vector<EigenValuesAndVectors*> eigvs;
int saved_neig = neig;
int tot_eigen_found= 0;
int i_eig_start = 0;
int i_eiv_start = 0;
size_t grp_shift = intervals.size()>1? thread::hardware_concurrency() / 4 :1;
size_t remaning_intervals = intervals.size();
for (auto grp_it = intervals.begin(); grp_it!= intervals.end() && neig>0 && grp_shift>0; grp_it+=grp_shift){
thread_group grp_thread;
for (auto it= grp_it; it!= grp_it+grp_shift; it++){
EigenValuesAndVectors *eigv = new EigenValuesAndVectorsFeast(matrix_a_,matrix_b_,*it,neig,m);
eigvs.push_back(eigv);
grp_thread.create_thread(bind(&EigenValuesAndVectors::Compute, eigv));
}
grp_thread.join_all();
for (auto it= eigvs.begin(); it!= eigvs.end() && neig>0; it++){
EigenValuesAndVectors *eigv = *it;
neig_found = eigv->GetNeigFound();
bool cond = neig_found<neig;
if ((*it)->Found()){
#ifdef _DEBUG
int i_eig_start_bck = i_eig_start;
#endif
eigv->FillExternalVectors(eig,eiv,i_eig_start,i_eiv_start, neig);
#ifdef _DEBUG
std::cout<<"found "<< (cond?neig_found:neig)<<" eigenvalue\\s within interval:"<<eigv->GetInterval().first<<"-"<<eigv->GetInterval().second<<std::endl;
for (int i=i_eig_start_bck; i<i_eig_start ; i++)
std::cout<<eig[i]<<std::endl;
#endif
tot_eigen_found+=cond?neig_found:neig;
neig-=cond?neig_found:neig;
}
delete eigv;
}
eigvs.clear();
remaning_intervals-=grp_shift;
grp_shift = remaning_intervals>grp_shift? grp_shift:remaning_intervals;
}
neig_found= tot_eigen_found;
return true;
}
class EigenValuesAndVectors
{
public:
EigenValuesAndVectors(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
A_(A),B_(B), interval_(interval)
{
mat_type_ = 'U';
feastinit (fpm_);
fpm_[1] = 3; //number of contour points Ne = 8
fpm_[3] = 2; //refinement loops
//fpm_[27] = 1; //check positive definte
epsout_ = 0.0; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
loop_ = 0; //On output, contains the number of refinement loop executed. Ignored on input.
m0_ = static_cast<int>(ceil(neig*1.5)); //Total number of eigenvalues to be computed
leiv_ = new double [m0_ *m];
leig_ = new double [m0_];
res_ = new double[m0_];
info_ = 0;
}
virtual void Compute()=0;
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig ) = 0;
bool Found() const
{
return info_ == 0 || info_ == 3;
}
int GetNeigFound() const
{
return neig_found_;
}
const double* GetEiv() const
{
return leiv_;
}
const double* GetEig() const
{
return leig_;
}
const std::pair<double, double> & GetInterval() const
{
return interval_;
}
~EigenValuesAndVectors()
{
delete [] leiv_;
delete [] leig_;
delete [] res_;
}
protected:
char mat_type_;
MKL_INT fpm_[128]; //see https://software.intel.com/en-us/node/470386
double epsout_; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
MKL_INT loop_; //On output, contains the number of refinement loop executed. Ignored on input.
MKL_INT m0_; //Total number of eigenvalues to be computed
const std::pair<double,double> &interval_;
std::shared_ptr<MklSymMatrix> A_;
std::shared_ptr<MklSymMatrix> B_;
int neig_found_;
double *leiv_;
double *leig_;
double *res_;
MKL_INT info_;
};
class EigenValuesAndVectorsFeast: public EigenValuesAndVectors
{
public:
EigenValuesAndVectorsFeast(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
EigenValuesAndVectors(A,B, interval,neig,m)
{}
virtual void Compute()
{
double emin = 1/interval_.second; //The lower bounds of the interval to be searched for eigenvalues; emin < emax.
double emax = 1/interval_.first; //The upper bounds of the interval to be searched for eigenvalues; emin < emax.
int num_of_mat_row = A_->N();
std::lock_guard<std::mutex> lock(feast_mutex_);
dfeast_scsrgv(&mat_type_, &num_of_mat_row, B_->A(),B_->Ia(),B_->Ja(), A_->A(),A_->Ia(),A_->Ja(),
fpm_,&epsout_,&loop_,&emin,&emax,&m0_,leig_,leiv_,&neig_found_,res_,&info_);
}
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig )
{
bool cond = neig_found_<neig;
int exit_loop = cond?0:neig_found_-neig;
for (int ie= neig_found_-1; ie>= exit_loop; ie--, i_eig_start++)
eig[i_eig_start]= 1/leig_[ie];
int m = A_->N();
for (int ie= (neig_found_-1)*m; ie>=exit_loop*m; ie-=m)
for (int j = ie; j<ie+m; j++)
eiv[i_eiv_start++]= leiv_[j];
}
protected:
static std::mutex feast_mutex_;
};
the results become perfectly reproducible (same as the sequential version).
Without the mutex, the computed eigenvalues vary slightly between runs.
So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
Thank you for your reply.
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Here is the relevant code section:
bool
MklSymMatrixSolver::EigenSystem(int neig, int m, double *eig, double *eiv, int &neig_found)
{
if (!matrix_a_ && !matrix_b_)
return false;
std::vector<std::pair<double,double>> intervals;
intervals.push_back(std::make_pair(0.1,1.5));
intervals.push_back(std::make_pair(1.5,3));
intervals.push_back(std::make_pair(3,6));
intervals.push_back(std::make_pair(6,15));
intervals.push_back(std::make_pair(15,30));
intervals.push_back(std::make_pair(30,60));
intervals.push_back(std::make_pair(60,120));
intervals.push_back(std::make_pair(120,250));
intervals.push_back(std::make_pair(250,500));
int last = 10000;
for (int i =500, shift =250; i<last; i+=shift)
intervals.push_back(std::make_pair(i,i+shift));
intervals.push_back(std::make_pair(last,1e10));
std::vector<EigenValuesAndVectors*> eigvs;
int saved_neig = neig;
int tot_eigen_found= 0;
int i_eig_start = 0;
int i_eiv_start = 0;
size_t grp_shift = intervals.size()>1? thread::hardware_concurrency() / 4 :1;
size_t remaning_intervals = intervals.size();
for (auto grp_it = intervals.begin(); grp_it!= intervals.end() && neig>0 && grp_shift>0; grp_it+=grp_shift){
thread_group grp_thread;
for (auto it= grp_it; it!= grp_it+grp_shift; it++){
EigenValuesAndVectors *eigv = new EigenValuesAndVectorsFeast(matrix_a_,matrix_b_,*it,neig,m);
eigvs.push_back(eigv);
grp_thread.create_thread(bind(&EigenValuesAndVectors::Compute, eigv));
}
grp_thread.join_all();
for (auto it= eigvs.begin(); it!= eigvs.end() && neig>0; it++){
EigenValuesAndVectors *eigv = *it;
neig_found = eigv->GetNeigFound();
bool cond = neig_found<neig;
if ((*it)->Found()){
#ifdef _DEBUG
int i_eig_start_bck = i_eig_start;
#endif
eigv->FillExternalVectors(eig,eiv,i_eig_start,i_eiv_start, neig);
#ifdef _DEBUG
std::cout<<"found "<< (cond?neig_found:neig)<<" eigenvalue\\s within interval:"<<eigv->GetInterval().first<<"-"<<eigv->GetInterval().second<<std::endl;
for (int i=i_eig_start_bck; i<i_eig_start ; i++)
std::cout<<eig[i]<<std::endl;
#endif
tot_eigen_found+=cond?neig_found:neig;
neig-=cond?neig_found:neig;
}
delete eigv;
}
eigvs.clear();
remaning_intervals-=grp_shift;
grp_shift = remaning_intervals>grp_shift? grp_shift:remaning_intervals;
}
neig_found= tot_eigen_found;
return true;
}
class EigenValuesAndVectors
{
public:
EigenValuesAndVectors(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
A_(A),B_(B), interval_(interval)
{
mat_type_ = 'U';
feastinit (fpm_);
fpm_[1] = 3; //number of contour points Ne = 8
fpm_[3] = 2; //refinement loops
//fpm_[27] = 1; //check positive definte
epsout_ = 0.0; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
loop_ = 0; //On output, contains the number of refinement loop executed. Ignored on input.
m0_ = static_cast<int>(ceil(neig*1.5)); //Total number of eigenvalues to be computed
leiv_ = new double [m0_ *m];
leig_ = new double [m0_];
res_ = new double[m0_];
info_ = 0;
}
virtual void Compute()=0;
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig ) = 0;
bool Found() const
{
return info_ == 0 || info_ == 3;
}
int GetNeigFound() const
{
return neig_found_;
}
const double* GetEiv() const
{
return leiv_;
}
const double* GetEig() const
{
return leig_;
}
const std::pair<double, double> & GetInterval() const
{
return interval_;
}
~EigenValuesAndVectors()
{
delete [] leiv_;
delete [] leig_;
delete [] res_;
}
protected:
char mat_type_;
MKL_INT fpm_[128]; //see https://software.intel.com/en-us/node/470386
double epsout_; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
MKL_INT loop_; //On output, contains the number of refinement loop executed. Ignored on input.
MKL_INT m0_; //Total number of eigenvalues to be computed
const std::pair<double,double> &interval_;
std::shared_ptr<MklSymMatrix> A_;
std::shared_ptr<MklSymMatrix> B_;
int neig_found_;
double *leiv_;
double *leig_;
double *res_;
MKL_INT info_;
};
class EigenValuesAndVectorsFeast: public EigenValuesAndVectors
{
public:
EigenValuesAndVectorsFeast(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
EigenValuesAndVectors(A,B, interval,neig,m)
{}
virtual void Compute()
{
double emin = 1/interval_.second; //The lower bounds of the interval to be searched for eigenvalues; emin < emax.
double emax = 1/interval_.first; //The upper bounds of the interval to be searched for eigenvalues; emin < emax.
int num_of_mat_row = A_->N();
std::lock_guard<std::mutex> lock(feast_mutex_);
dfeast_scsrgv(&mat_type_, &num_of_mat_row, B_->A(),B_->Ia(),B_->Ja(), A_->A(),A_->Ia(),A_->Ja(),
fpm_,&epsout_,&loop_,&emin,&emax,&m0_,leig_,leiv_,&neig_found_,res_,&info_);
}
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig )
{
bool cond = neig_found_<neig;
int exit_loop = cond?0:neig_found_-neig;
for (int ie= neig_found_-1; ie>= exit_loop; ie--, i_eig_start++)
eig[i_eig_start]= 1/leig_[ie];
int m = A_->N();
for (int ie= (neig_found_-1)*m; ie>=exit_loop*m; ie-=m)
for (int j = ie; j<ie+m; j++)
eiv[i_eiv_start++]= leiv_[j];
}
protected:
static std::mutex feast_mutex_;
};
the results become perfectly reproducible (same as the sequential version).
Without the mutex, the computed eigenvalues vary slightly between runs.
So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
Thank you for your reply.
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Here is the relevant code section:
bool
MklSymMatrixSolver::EigenSystem(int neig, int m, double *eig, double *eiv, int &neig_found)
{
if (!matrix_a_ && !matrix_b_)
return false;
std::vector<std::pair<double,double>> intervals;
intervals.push_back(std::make_pair(0.1,1.5));
intervals.push_back(std::make_pair(1.5,3));
intervals.push_back(std::make_pair(3,6));
intervals.push_back(std::make_pair(6,15));
intervals.push_back(std::make_pair(15,30));
intervals.push_back(std::make_pair(30,60));
intervals.push_back(std::make_pair(60,120));
intervals.push_back(std::make_pair(120,250));
intervals.push_back(std::make_pair(250,500));
int last = 10000;
for (int i =500, shift =250; i<last; i+=shift)
intervals.push_back(std::make_pair(i,i+shift));
intervals.push_back(std::make_pair(last,1e10));
std::vector<EigenValuesAndVectors*> eigvs;
int saved_neig = neig;
int tot_eigen_found= 0;
int i_eig_start = 0;
int i_eiv_start = 0;
size_t grp_shift = intervals.size()>1? thread::hardware_concurrency() / 4 :1;
size_t remaning_intervals = intervals.size();
for (auto grp_it = intervals.begin(); grp_it!= intervals.end() && neig>0 && grp_shift>0; grp_it+=grp_shift){
thread_group grp_thread;
for (auto it= grp_it; it!= grp_it+grp_shift; it++){
EigenValuesAndVectors *eigv = new EigenValuesAndVectorsFeast(matrix_a_,matrix_b_,*it,neig,m);
eigvs.push_back(eigv);
grp_thread.create_thread(bind(&EigenValuesAndVectors::Compute, eigv));
}
grp_thread.join_all();
for (auto it= eigvs.begin(); it!= eigvs.end() && neig>0; it++){
EigenValuesAndVectors *eigv = *it;
neig_found = eigv->GetNeigFound();
bool cond = neig_found<neig;
if ((*it)->Found()){
#ifdef _DEBUG
int i_eig_start_bck = i_eig_start;
#endif
eigv->FillExternalVectors(eig,eiv,i_eig_start,i_eiv_start, neig);
#ifdef _DEBUG
std::cout<<"found "<< (cond?neig_found:neig)<<" eigenvalue\\s within interval:"<<eigv->GetInterval().first<<"-"<<eigv->GetInterval().second<<std::endl;
for (int i=i_eig_start_bck; i<i_eig_start ; i++)
std::cout<<eig[i]<<std::endl;
#endif
tot_eigen_found+=cond?neig_found:neig;
neig-=cond?neig_found:neig;
}
delete eigv;
}
eigvs.clear();
remaning_intervals-=grp_shift;
grp_shift = remaning_intervals>grp_shift? grp_shift:remaning_intervals;
}
neig_found= tot_eigen_found;
return true;
}
class EigenValuesAndVectors
{
public:
EigenValuesAndVectors(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
A_(A),B_(B), interval_(interval)
{
mat_type_ = 'U';
feastinit (fpm_);
fpm_[1] = 3; //number of contour points Ne = 8
fpm_[3] = 2; //refinement loops
//fpm_[27] = 1; //check positive definte
epsout_ = 0.0; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
loop_ = 0; //On output, contains the number of refinement loop executed. Ignored on input.
m0_ = static_cast<int>(ceil(neig*1.5)); //Total number of eigenvalues to be computed
leiv_ = new double [m0_ *m];
leig_ = new double [m0_];
res_ = new double[m0_];
info_ = 0;
}
virtual void Compute()=0;
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig ) = 0;
bool Found() const
{
return info_ == 0 || info_ == 3;
}
int GetNeigFound() const
{
return neig_found_;
}
const double* GetEiv() const
{
return leiv_;
}
const double* GetEig() const
{
return leig_;
}
const std::pair<double, double> & GetInterval() const
{
return interval_;
}
~EigenValuesAndVectors()
{
delete [] leiv_;
delete [] leig_;
delete [] res_;
}
protected:
char mat_type_;
MKL_INT fpm_[128]; //see https://software.intel.com/en-us/node/470386
double epsout_; //On output, contains the relative error on the trace: |tracei - tracei-1| /max(|emin|, |emax|)
MKL_INT loop_; //On output, contains the number of refinement loop executed. Ignored on input.
MKL_INT m0_; //Total number of eigenvalues to be computed
const std::pair<double,double> &interval_;
std::shared_ptr<MklSymMatrix> A_;
std::shared_ptr<MklSymMatrix> B_;
int neig_found_;
double *leiv_;
double *leig_;
double *res_;
MKL_INT info_;
};
class EigenValuesAndVectorsFeast: public EigenValuesAndVectors
{
public:
EigenValuesAndVectorsFeast(std::shared_ptr<MklSymMatrix> A, std::shared_ptr<MklSymMatrix> B, const std::pair<double,double> &interval, int neig, int m):
EigenValuesAndVectors(A,B, interval,neig,m)
{}
virtual void Compute()
{
double emin = 1/interval_.second; //The lower bounds of the interval to be searched for eigenvalues; emin < emax.
double emax = 1/interval_.first; //The upper bounds of the interval to be searched for eigenvalues; emin < emax.
int num_of_mat_row = A_->N();
std::lock_guard<std::mutex> lock(feast_mutex_);
dfeast_scsrgv(&mat_type_, &num_of_mat_row, B_->A(),B_->Ia(),B_->Ja(), A_->A(),A_->Ia(),A_->Ja(),
fpm_,&epsout_,&loop_,&emin,&emax,&m0_,leig_,leiv_,&neig_found_,res_,&info_);
}
virtual void FillExternalVectors(double *eig, double *eiv, int &i_eig_start, int &i_eiv_start, int &neig )
{
bool cond = neig_found_<neig;
int exit_loop = cond?0:neig_found_-neig;
for (int ie= neig_found_-1; ie>= exit_loop; ie--, i_eig_start++)
eig[i_eig_start]= 1/leig_[ie];
int m = A_->N();
for (int ie= (neig_found_-1)*m; ie>=exit_loop*m; ie-=m)
for (int j = ie; j<ie+m; j++)
eiv[i_eiv_start++]= leiv_[j];
}
protected:
static std::mutex feast_mutex_;
};
the results become perfectly reproducible (same as the sequential version).
Without the mutex, the computed eigenvalues vary slightly between runs.
So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Fengrui,
Thank you for your reply.
To clarify, I’m not using OpenMP.
The FEAST solvers (dfeast_scsrgv) are executed in parallel threads created manually using boost::thread_group (each thread computes the eigenvalues in a different interval).
Each thread constructs its own instance of EigenValuesAndVectorsFeast, which in turn calls dfeast_scsrgv.
Attached you'll finde the relevant code section.
The results become perfectly reproducible (same as the sequential version).
Without the mutex, the computed eigenvalues vary slightly between runs — even when mkl_set_num_threads(1) is used.
So it seems that dfeast_scsrgv is not fully thread-safe when called concurrently from different threads, even if the data and fpm arrays are distinct per thread.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page