- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello all:
I'm trying to solve a linear system (9 by 9 full matrix) by pdgesv in c. I use the example code (http://icl.cs.utk.edu/lapack-forum/viewtopic.php?f=2&t=1683&sid=26b4f2535c0c3856d02bdadd62d08c5f#) and compiling is ok. However, there is a error info after calling pdgesv:
“On entry to PDGESV parameter number 602 had an illegal value”.
According to the PDGESV source code, it means the 2th element of the 6th argument was wrong, which means the descA[1] is wrong?
However, descA[1] (ictxt) is rewrited by Cblacs_gridinit(&ictxt, "Row", nprow, npcol).
after printf ictxt, I find all ictxt = 0. IS this reason lead to the error message?
I really need to solve a large dense(full) matrix, can someone help me or give me some advice?
The following is my C code:
PS: I've called MPI_Init(&argc,&argv) before entering Scalapack(int argc, char ** argv) and called MPI_Finalize() after leaving Scalapack(int argc, char ** argv).
#include <mpi.h>
#include <iostream>
#include <vector>
#include <cmath>
#include <cstdlib>
#include "Scalapack.h"
#include <mkl.h>
#include <mkl_scalapack.h>
#include "mkl_lapacke.h"
#include <mkl_cblas.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])
#define p_of_i(i,bs,p) ( MKL_INT((i-1)/bs)%p)
#define l_of_i(i,bs,p) ( MKL_INT((i-1)/(p*bs)))
#define x_of_i(i,bs,p) (((i-1)%bs)+1)
#define numroc_ NUMROC
using namespace std;
extern "C"
{
/* BLACS C interface */
void Cblacs_pinfo(int* mypnum, int* nprocs);
void Cblacs_get( MKL_INT context, MKL_INT request, MKL_INT* value);
int Cblacs_gridinit( MKL_INT* context, char * order, MKL_INT np_row, MKL_INT np_col);
void Cblacs_gridinfo( MKL_INT context, MKL_INT* np_row, MKL_INT* np_col, MKL_INT* my_row,
MKL_INT* my_col);
int numroc_( MKL_INT *n, MKL_INT *nb, MKL_INT *iproc, MKL_INT *isrcproc, MKL_INT *nprocs);
void Cblacs_gridexit(MKL_INT ictxt);
void Cblacs_barrier(MKL_INT ictxt, char * order);
}
void find_nps(MKL_INT np, MKL_INT &nprow, MKL_INT & npcol);
int getIndex(MKL_INT row, MKL_INT col,MKL_INT NCOLS) {return row*NCOLS+col;}
CTEST_Scalapack::CTEST_Scalapack(void)
{
}
CTEST_Scalapack::~CTEST_Scalapack(void)
{
}
int CTEST_Scalapack::Scalapack(int argc, char ** argv)
{
int nprocs = 0;//MPI::COMM_WORLD.Get_size();
int rank = 0;//MPI::COMM_WORLD.Get_rank();
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
std::cout<<"Returned: "<<" ";
std::cout << "Hello World! I am " << rank << " of " << nprocs <<
std::endl;
srand(1);
MKL_INT myrow=0;
MKL_INT mycol=0;
MKL_INT ictxt=0;
MKL_INT nprow=0,npcol=0;
MKL_INT BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be larger than the original matrix
MKL_INT locR=0, locC=0;
MKL_INT block = BLOCK_SIZE;
MKL_INT izero = 0;
MKL_INT matrix_size = 9;
MKL_INT myone = 1;
MKL_INT nrhs = 1;
MKL_INT info=0;
int i=0,j=0;
double mone=(-1.e0),pone=(1.e0);
double AnormF=0.e0, XnormF=0.e0, RnormF=0.e0, BnormF=0.e0, residF=0.e0,eps=0.e0;
find_nps(nprocs,nprow,npcol);
Cblacs_pinfo( &rank, &nprocs ) ;
Cblacs_get(-1, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);
locR = numroc_(&matrix_size, &block, &myrow, &izero, &nprow);
locC = numroc_(&matrix_size, &block, &mycol, &izero, &npcol);
////GLOBAL
double * A = new double[matrix_size*matrix_size]();
double * B = new double[matrix_size]();
double * Acpy = new double[matrix_size*matrix_size]();
double * Bcpy = new double[matrix_size]();
//LOCAL
double * local_know_vector = new double[locR]();
double * local_matrix = new double[locR*locC]();
MKL_INT* ipiv = new MKL_INT [locC*locR*block+1000000]();
B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5] = 0;
A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;
A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;
A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;
A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;
A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;
A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;
A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;
A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;
A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;
MKL_INT* descA = new MKL_INT[9]();
MKL_INT* descB = new MKL_INT[9]();
descA[0] = 1; // descriptor type
descA[1] = ictxt; // blacs context
descA[2] = matrix_size; // global number of rows
descA[3] = matrix_size; // global number of columns
descA[4] = block; // row block size
descA[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descA[6] = 0; // initial process row(DEFINED 0)
descA[7] = 0; // initial process column (DEFINED 0)
descA[8] = locR; // leading dimension of local array
descB[0] = 1; // descriptor type
descB[1] = ictxt; // blacs context
descB[2] = matrix_size; // global number of rows
descB[3] = 1; // global number of columns
descB[4] = block; // row block size
descB[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descB[6] = 0; // initial process row(DEFINED 0)
descB[7] = 0; // initial process column (DEFINED 0)
descB[8] = locR; // leading dimension of local array
int il=0, jl=0;
for(i=1; i< matrix_size+1; i++)
{
for(j=1; j< matrix_size+1; j++)
{
int pi = p_of_i(i,block,nprow);
int li = l_of_i(i,block,nprow);
int xi = x_of_i(i,block,nprow);
//printf("i = %d, j = %d, pi = %d, li = %d\n",i,j,pi,li);;fflush(stdout);
int pj = p_of_i(j,block,npcol);
int lj = l_of_i(j,block,npcol);
int xj = x_of_i(j,block,npcol);
//printf("i = %d, j = %d, pj = %d, lj = %d, xj = %d\n",i,j,pj,lj,xj);;fflush(stdout);
if( (pi == myrow) && (pj == mycol))
{
il = li*block+xi;
jl = lj*block+xj;
local_matrix[getIndex(il-1, jl-1, locC)] = A[getIndex(i-1,j-1,matrix_size)];
}
if( (pi == myrow) &&(mycol==0) )
{
local_know_vector[il-1] = B[i-1];
}
}
}
////STARTING PDGESV
pdgesv_(&matrix_size, &nrhs, local_matrix, &myone, &myone, descA, ipiv, local_know_vector, &myone, &myone, descB, &info);
if(rank==0)
{
if(info != 0) cout <<"PDGESV problem! Info "<<info<<endl;
}
for(i=0; i< locR; i++)
{
cout<<"**\n"<<"rank "<<rank<<" answer: "<<local_know_vector<<endl;
}
if(NULL!=descA) {delete [] descA; descA=NULL;}
if(NULL!=descB) {delete [] descB; descB=NULL;}
if(NULL!=local_know_vector) {delete [] local_know_vector; local_know_vector=NULL;}
if(NULL!=local_matrix) {delete [] local_matrix; local_matrix=NULL;}
if(NULL!=Acpy) {delete [] Acpy; Acpy=NULL;}
if(NULL!=Bcpy) {delete [] Bcpy; Bcpy=NULL;}
if(NULL!=A) {delete [] A; A=NULL;}
if(NULL!=B) {delete [] B; B=NULL;}
Cblacs_gridexit(ictxt);
return 0;
}
void find_nps(MKL_INT np, MKL_INT &nprow, MKL_INT & npcol)
{
MKL_INT min_nprow=100000;
MKL_INT min_npcol=100000;
nprow = np;
npcol = np;
while(1) {
npcol--;
if(np%2==0 ) {
if(npcol ==1){
nprow --;
npcol = nprow;
}
}else {
if(npcol ==0){
nprow --;
npcol = nprow;
}
}
if(nprow*npcol == np) {
min_npcol = npcol;
if(nprow < min_nprow) min_nprow = nprow;
}
if(nprow ==1 ) break;
}
nprow = min_nprow;
npcol = min_npcol;
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi
I’ve corrected the example to properly define process grid and init MPI.
Now it works fine for any number of MPI processes. Attach here.
and the article : https://software.intel.com/en-us/articles/using-cluster-mkl-pblasscalapack-fortran-routine-in-your-c-program/ had pdgemv sample code https://software.intel.com/sites/default/files/article/165948/pdgemv.c
#include <mpi.h>
#include <iostream>
#include <vector>
#include <cmath>
#include <cstdlib>
//#include "Scalapack.h"
#include <mkl.h>
#include <mkl_scalapack.h>
#include "mkl_lapacke.h"
#include <mkl_cblas.h>
#define mat(matriz,coluna,i,j) (matriz[i*coluna+j])
#define p_of_i(i,bs,p) ( MKL_INT((i-1)/bs)%p)
#define l_of_i(i,bs,p) ( MKL_INT((i-1)/(p*bs)))
#define x_of_i(i,bs,p) (((i-1)%bs)+1)
//#define numroc_ NUMROC
using namespace std;
extern "C"
{
/* BLACS C interface */
void Cblacs_pinfo(int* mypnum, int* nprocs);
void Cblacs_get( MKL_INT context, MKL_INT request, MKL_INT* value);
int Cblacs_gridinit( MKL_INT* context, char * order, MKL_INT np_row, MKL_INT np_col);
void Cblacs_gridinfo( MKL_INT context, MKL_INT* np_row, MKL_INT* np_col, MKL_INT* my_row,
MKL_INT* my_col);
int numroc_( MKL_INT *n, MKL_INT *nb, MKL_INT *iproc, MKL_INT *isrcproc, MKL_INT *nprocs);
void Cblacs_gridexit(MKL_INT ictxt);
void Cblacs_barrier(MKL_INT ictxt, char * order);
}
void find_nps(MKL_INT np, MKL_INT &nprow, MKL_INT & npcol);
int getIndex(MKL_INT row, MKL_INT col,MKL_INT NCOLS) {return row*NCOLS+col;}
/*
CTEST_Scalapack::CTEST_Scalapack(void)
{
}
CTEST_Scalapack::~CTEST_Scalapack(void)
{
}
*/
int main(int argc, char ** argv)
{
int nprocs = 0;//MPI::COMM_WORLD.Get_size();
int rank = 0;//MPI::COMM_WORLD.Get_rank();
MPI_Init(&argc,&argv);
MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
MPI_Comm_rank(MPI_COMM_WORLD,&rank);
std::cout<<"Returned: "<<" ";
std::cout << "Hello World! I am " << rank << " of " << nprocs <<
std::endl;
srand(1);
MKL_INT myrow=0;
MKL_INT mycol=0;
MKL_INT ictxt=0;
MKL_INT nprow=0,npcol=0;
MKL_INT BLOCK_SIZE =2; //this gonna be tricky - should be 64, but cannot be larger than the original matrix
MKL_INT locR=0, locC=0;
MKL_INT block = BLOCK_SIZE;
MKL_INT izero = 0;
MKL_INT matrix_size = 9;
MKL_INT myone = 1;
MKL_INT nrhs = 1;
MKL_INT info=0;
int i=0,j=0;
double mone=(-1.e0),pone=(1.e0);
double AnormF=0.e0, XnormF=0.e0, RnormF=0.e0, BnormF=0.e0, residF=0.e0,eps=0.e0;
find_nps(nprocs,nprow,npcol);
Cblacs_pinfo( &rank, &nprocs ) ;
Cblacs_get(-1, 0, &ictxt);
Cblacs_gridinit(&ictxt, "Row", nprow, npcol);
Cblacs_gridinfo(ictxt, &nprow, &npcol, &myrow, &mycol);
locR = numroc_(&matrix_size, &block, &myrow, &izero, &nprow);
locC = numroc_(&matrix_size, &block, &mycol, &izero, &npcol);
////GLOBAL
double * A = new double[matrix_size*matrix_size]();
double * B = new double[matrix_size]();
double * Acpy = new double[matrix_size*matrix_size]();
double * Bcpy = new double[matrix_size]();
//LOCAL
double * local_know_vector = new double[locR]();
double * local_matrix = new double[locR*locC]();
MKL_INT* ipiv = new MKL_INT [locC*locR*block+1000000]();
B[0] = 0;
B[1] = 0;
B[2] = 1;
B[3] = 0;
B[4] = 0;
B[5] = 0;
B[6] = 0;
B[7] = 0;
A[0] = 19;
A[1] = 3;
A[2] = 1;
A[3] = 12;
A[4] = 1;
A[5] = 16;
A[6] = 1;
A[7] = 3;
A[8] = 11;
A[9] = -19;
A[10] = 3;
A[11] = 1;
A[12] = 12;
A[13] = 1;
A[14] = 16;
A[15] = 1;
A[16] = 3;
A[17] = 11;
A[18] = -19;
A[19] = -3;
A[20] = 1;
A[21] = 12;
A[22] = 1;
A[23] = 16;
A[24] = 1;
A[25] = 3;
A[26] = 11;
A[27] = -19;
A[28] = -3;
A[29] = -1;
A[30] = 12;
A[31] = 1;
A[32] = 16;
A[33] = 1;
A[34] = 3;
A[35] = 11;
A[36] = -19;
A[37] = -3;
A[38] = -1;
A[39] = -12;
A[40] = 1;
A[41] = 16;
A[42] = 1;
A[43] = 3;
A[44] = 11;
A[45] = -19;
A[46] = -3;
A[47] = -1;
A[48] = -12;
A[49] = -1;
A[50] = 16;
A[51] = 1;
A[52] = 3;
A[53] = 11;
A[54] = -19;
A[55] = -3;
A[56] = -1;
A[57] = -12;
A[58] = -1;
A[59] = -16;
A[60] = 1;
A[61] = 3;
A[62] = 11;
A[63] = -19;
A[64] = -3;
A[65] = -1;
A[66] = -12;
A[67] = -1;
A[68] = -16;
A[69] = -1;
A[70] = 3;
A[71] = 11;
A[72] = -19;
A[73] = -3;
A[74] = -1;
A[75] = -12;
A[76] = -1;
A[77] = -16;
A[78] = -1;
A[79] = -3;
A[80] = 11;
MKL_INT* descA = new MKL_INT[9]();
MKL_INT* descB = new MKL_INT[9]();
descA[0] = 1; // descriptor type
descA[1] = ictxt; // blacs context
descA[2] = matrix_size; // global number of rows
descA[3] = matrix_size; // global number of columns
descA[4] = block; // row block size
descA[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descA[6] = 0; // initial process row(DEFINED 0)
descA[7] = 0; // initial process column (DEFINED 0)
descA[8] = locR; // leading dimension of local array
descB[0] = 1; // descriptor type
descB[1] = ictxt; // blacs context
descB[2] = matrix_size; // global number of rows
descB[3] = 1; // global number of columns
descB[4] = block; // row block size
descB[5] = block; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)
descB[6] = 0; // initial process row(DEFINED 0)
descB[7] = 0; // initial process column (DEFINED 0)
descB[8] = locR; // leading dimension of local array
int il=0, jl=0;
for(i=1; i< matrix_size+1; i++)
{
for(j=1; j< matrix_size+1; j++)
{
int pi = p_of_i(i,block,nprow);
int li = l_of_i(i,block,nprow);
int xi = x_of_i(i,block,nprow);
//printf("i = %d, j = %d, pi = %d, li = %d\n",i,j,pi,li);;fflush(stdout);
int pj = p_of_i(j,block,npcol);
int lj = l_of_i(j,block,npcol);
int xj = x_of_i(j,block,npcol);
//printf("i = %d, j = %d, pj = %d, lj = %d, xj = %d\n",i,j,pj,lj,xj);;fflush(stdout);
if( (pi == myrow) && (pj == mycol))
{
il = li*block+xi;
jl = lj*block+xj;
local_matrix[getIndex(il-1, jl-1, locC)] = A[getIndex(i-1,j-1,matrix_size)];
}
if( (pi == myrow) &&(mycol==0) )
{
local_know_vector[il-1] = B[i-1];
}
}
}
// above initialization code have some issue. in our sample, we use
for(i=0;i<locR;++i) for(j=0;j<locC;++j) {
int gi = i%block + block*myrow + (i/block)*block*nprow;
int gj = j%block + block*mycol + (j/block)*block*npcol;
local_matrix[i + locR*j] = A[gi*matrix_size + gj]; // note: col-major <- row-major
local_know_vector = B[gi];
}
////STARTING PDGESV
pdgesv_(&matrix_size, &nrhs, local_matrix, &myone, &myone, descA, ipiv, local_know_vector, &myone, &myone, descB, &info);
if(rank==0)
{
if(info != 0) cout <<"PDGESV problem! Info "<<info<<endl;
}
for (i = 0; i < locR; i++)
{
int gi = i%block + block*myrow + (i/block)*block*nprow;
if (mycol == 0) printf("res[ %i ]=%lg\n",gi,local_know_vector);
}
// for(i=0; i< locR; i++)
// {
// cout<<"**\n"<<"rank "<<rank<<" answer: "<<local_know_vector<<endl;
// }
if(NULL!=descA) {delete [] descA; descA=NULL;}
if(NULL!=descB) {delete [] descB; descB=NULL;}
if(NULL!=local_know_vector) {delete [] local_know_vector; local_know_vector=NULL;}
if(NULL!=local_matrix) {delete [] local_matrix; local_matrix=NULL;}
if(NULL!=Acpy) {delete [] Acpy; Acpy=NULL;}
if(NULL!=Bcpy) {delete [] Bcpy; Bcpy=NULL;}
if(NULL!=A) {delete [] A; A=NULL;}
if(NULL!=B) {delete [] B; B=NULL;}
Cblacs_gridexit(ictxt);
return 0;
}
void find_nps(MKL_INT np, MKL_INT &nprow, MKL_INT & npcol)
{
#if 1
nprow = (int)sqrt( np );
npcol = np / nprow;
return;
#else
MKL_INT min_nprow=100000;
MKL_INT min_npcol=100000;
nprow = np;
npcol = np;
while(1) {
npcol--;
if(np%2==0 ) {
if(npcol ==1){
nprow --;
npcol = nprow;
}
}else {
if(npcol ==0){
nprow --;
npcol = nprow;
}
}
if(nprow*npcol == np) {
min_npcol = npcol;
if(nprow < min_nprow) min_nprow = nprow;
}
if(nprow ==1 ) break;
}
nprow = min_nprow;
npcol = min_npcol;
#endif
}
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Ying,
I ran your code on NERSC Cori supercomputer, but I got the error information:
Returned: Hello World! I am 0 of 2
Returned: Hello World! I am 1 of 2
Rank 0 [Wed Jan 13 12:23:10 2016] [c0-0c0s8n3] Fatal error in MPI_Send: Invalid tag, error stack:
MPI_Send(186): MPI_Send(buf=0x139d640, count=2, MPI_INT, dest=1, tag=5000000, comm=0x84000001) failed
MPI_Send(111): Invalid tag, value is 5000000
Rank 1 [Wed Jan 13 12:23:10 2016] [c0-0c0s9n0] Fatal error in MPI_Recv: Invalid tag, error stack:
MPI_Recv(199): MPI_Recv(buf=0x139b5c0, count=2, MPI_INT, src=0, tag=5000000, comm=0x84000001, status=0x7fffffff5618) failed
MPI_Recv(118): Invalid tag, value is 5000000
srun: error: nid00035: task 0: Aborted
srun: Terminating job step 929053.0
srun: Job step aborted: Waiting up to 32 seconds for job step to finish.
srun: error: nid00036: task 1: Aborted
Could you help me?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Yang,
Could you also show your compile command and run command, MPI version, MKL version etc information?
There was some MPI sample in MKL install directory, could you please try them first and see if mkl work fine on the supercomputer?
Best Regards,
Ying
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Ying,
I'm using the NERSC cori supercomputer, all the information is at:
http://www.nersc.gov/users/computational-systems/cori/
The compile command is:
source /opt/intel/bin/compilervars.sh intel64
source /opt/intel/impi/5.0.2.044/bin64/mpivars.sh
CC -o execute pdgesv.cpp -mkl:cluster
The run command is sbatch test.sl
where test.sl is
#!/bin/bash -l
#SBATCH --partition debug
#SBATCH --nodes 2
#SBATCH --time=00:03:00
cd $SLURM_SUBMIT_DIR
srun -n 2 ./execute;
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Yang,
You are probably using Cray MPI that does not support tags having large value. MKL used to use such tags internally. Now the issue is fixed, and if you use a recent MKL version (e.g. MKL 11.3.1) the test should pass.
Alternatively, it should work with Intel MPI.
Thanks
Dima
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
If I change the number of processes, the solution of linear equation will be different. Also, the solutions of this code is different from the Matlab solution. Let us solve the linear equation Ax = b
A is
19 3 1 12 1 16 1 3 11
-19 3 1 12 1 16 1 3 11
-19 -3 1 12 1 16 1 3 11
-19 -3 -1 12 1 16 1 3 11
-19 -3 -1 -12 1 16 1 3 11
-19 -3 -1 -12 -1 16 1 3 11
-19 -3 -1 -12 -1 -16 1 3 11
-19 -3 -1 -12 -1 -16 -1 3 11
-19 -3 -1 -12 -1 -16 -1 -3 11
b is
0
0
1
0
0
0
0
0
0
If we use Matlab, the solution is:
0
-0.1667
0.5000
0
0
0
0
0
0
If we use this code for one process, the solution is:
answer[0]: 0.000000
answer[1]: 0.000000
answer[2]: 0.500000
answer[3]: -0.500000
answer[4]: -0.000000
answer[5]: -0.000000
answer[6]: 0.000000
answer[7]: -0.000000
answer[8]: -0.000000
If we use this code for two processes, the solution is:
answer[0]: 0.003434
answer[1]: 0.016138
answer[2]: 0.019397
answer[3]: 0.033449
answer[4]: -0.025858
answer[5]: 0.013383
answer[6]: 0.022018
answer[7]: -0.060255
answer[8]: 0.025895
If we use this code for three processes, the solution is:
answer[0]: -0.023551
answer[1]: 0.017566
answer[2]: 0.004775
answer[3]: 0.004377
answer[4]: 0.023704
answer[5]: 0.027967
answer[6]: -0.023534
answer[7]: 0.017509
answer[8]: 0.000398
Can you tell me where went wrong? How can I get the correct solution by this code?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The matrix should be distributed correctly. For example, consider distribution of rows. Let N be global number of rows, LOCR the local number of rows as obtained with a call to numroc(), B the block size, p my processor row, P the number of processor rows. Then for locn = 0...LOCR-1 the global index n=0...N-1 is computed thus:
n = locn % B // offset within current block (block locn/B)
+ B*p // offset of the current block within current cycle of block-cyclic distribution
+ P*B*(locn/B) // offset of the current cycle on the grid of P processors
;
Once you know (i,j) for (loci,locj) you can initialize local_matrix[loci + locR*locj] = A[i + matrix_size*j]. Please note that ScaLAPACK supports only column-major ordering of matrices. That is, local matrix element (i,j) should be located at offset i+locR*j, not at i*locC + j,
I hope this will help you.
Thanks
Dima
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi Dima,
Thank you much for the correction.
The proper code for initialization of local matrix should be inserted before the call of pdgesv: ( I will reedit the above of the code)
for(i=0;i<locR;++i) for(j=0;j<locC;++j) {
int gi = i%block + block*myrow + (i/block)*block*nprow;
int gj = j%block + block*mycol + (j/block)*block*npcol;
local_matrix[i + locR*j] = A[gi*matrix_size + gj]; // note: col-major <- row-major
local_know_vector = B[gi];
}
The solution can be gathered to some rank, or it can be just printed:
for (i = 0; i < locR; i++)
{
int gi = i%block + block*myrow + (i/block)*block*nprow;
if (mycol == 0) printf("res[ %i ]=%lg\n",gi,local_know_vector);
}
Here are a few runs:
$ mpirun -np 1 --prepend-rank ./a.out | sort -k2
[0] res[ 0 ]=0
[0] res[ 1 ]=-0.166667
[0] res[ 2 ]=0.5
[0] res[ 3 ]=0
[0] res[ 4 ]=0
[0] res[ 5 ]=0
[0] res[ 6 ]=0
[0] res[ 7 ]=0
[0] res[ 8 ]=0
$ mpirun -np 4 --prepend-rank ./a.out | sort -k2
[0] res[ 0 ]=0
[0] res[ 1 ]=-0.166667
[2] res[ 2 ]=0.5
[2] res[ 3 ]=0
[0] res[ 4 ]=0
[0] res[ 5 ]=0
[2] res[ 6 ]=0
[2] res[ 7 ]=0
[0] res[ 8 ]=0
Thanks
Ying
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thank you, Dima and Ying, it works very well :-)
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page