Hi all,
I would like to use PZGESVD of SCALAPACK to solve my problem distributedly. Before using PZGESVD, the matrix should be distributed to all the processes involved. Therefore, I want to test the usage of PDGEMR2D in advance.
However, I have some problem in using PDGEMR2D. It generated an error from which I cannot figure the cause.
The error is:
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
??MR2D:Bad submatrix:i=-1,j=-1,m=50,n=10,M=50,N=10
Assertion failed in c:\bt\479\private\mpich2\src\pm\smpd\smpd_handle_command.cpp(640): proc != 0
unable to read the cmd header on the left child context, Other MPI error, error stack:
ReadFailed(1298): An existing connection was forcibly closed by the remote host. (errno 10054).
Aborting: mpiexec on TEMFPC1005 failed to communicate with smpd on TEMFPC1005
Other MPI error, error stack:
ReadFailed(1298): An existing connection was forcibly closed by the remote host. (errno 10054)
The Code is:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <iostream>
#include <iomanip>
#include <string>
#include <fstream>
#include <sstream>
#include <complex>
#include <algorithm>
#include <vector>
//#define MKL_Complex8 std::complex<float>
//#define MKL_Complex16 std::complex<double>
#include <mkl_blacs.h>
#include <mkl_scalapack.h>
#include <mkl_pblas.h>
#include <mkl.h>
#include "petsc.h"
#define MAX(a,b)((a)<(b)?(b):(a))
#define MIN(a,b)((a)>(b)?(b):(a))
using namespace std;
static char help[] = "Test SCALAPACK";
int main(int argc, char **argv)
{
PetscErrorCode ierr;
const MKL_INT intmkl_negone = -1, intmkl_zero = 0;
MKL_INT intmkl_rank, intmkl_size, intmkl_info, intmkl_ctxt, intmkl_nProcRows, intmkl_nProcCols, intmkl_myRow, intmkl_myCol;
MKL_INT intmkl_MA, intmkl_NA, intmkl_MBA, intmkl_NBA, intmkl_lldA, intmkl_nRowProc, intmkl_nColProc;
MKL_INT intmkl_MB, intmkl_NB, intmkl_MBB, intmkl_NBB, intmkl_lldB;
MKL_INT descA[9], descB[9];
PetscInt int_size, int_rank, int_numLocalRowMatA, int_numGlobalRowMatA, int_numGlobalColMatA;
Mat mat_A;
double *doubleAr_A, *doubleAr_B;
/* MPI initialization */
ierr = PetscInitialize(&argc, &argv, (char*)0, help); CHKERRQ(ierr);
MPI_Comm_size(PETSC_COMM_WORLD, &int_size);
MPI_Comm_rank(PETSC_COMM_WORLD, &int_rank);
/* Generate a random matrix */
int_numLocalRowMatA = 5;
int_numGlobalRowMatA = int_numLocalRowMatA*int_size;
int_numGlobalColMatA = 10;
/* initialize blacs */
BLACS_PINFO(&intmkl_rank, &intmkl_size);
intmkl_nProcRows = intmkl_size;
intmkl_nProcCols = 1;
BLACS_GET(&intmkl_negone, &intmkl_zero, &intmkl_ctxt);
BLACS_GRIDINIT(&intmkl_ctxt, "C", &intmkl_nProcRows, &intmkl_nProcCols);
BLACS_GRIDINFO(&intmkl_ctxt, &intmkl_nProcRows, &intmkl_nProcCols, &intmkl_myRow, &intmkl_myCol);
/* compute precise length of local pieces and allocate array on each process for parts of distributed matrices */
intmkl_MA = (MKL_INT)int_numGlobalRowMatA;
intmkl_NA = (MKL_INT)int_numGlobalColMatA;
intmkl_MBA = (MKL_INT)int_numLocalRowMatA;
intmkl_NBA = (MKL_INT)int_numGlobalColMatA;
intmkl_nRowProc = NUMROC(&intmkl_MA, &intmkl_MBA, &intmkl_myRow, &intmkl_zero, &intmkl_nProcRows);
intmkl_nColProc = NUMROC(&intmkl_NA, &intmkl_NBA, &intmkl_myCol, &intmkl_zero, &intmkl_nProcCols);
intmkl_lldA = MAX(1, intmkl_nRowProc);
DESCINIT(descA, &intmkl_MA, &intmkl_NA, &intmkl_MBA, &intmkl_NBA, &intmkl_zero, &intmkl_zero, &intmkl_ctxt, &intmkl_lldA, &intmkl_info);
std::cout << "MA = " << intmkl_MA << "; NA = " << intmkl_NA << "; intmkl_nRowProc = " << intmkl_nRowProc << "; intmkl_nColProc = " << intmkl_nColProc << "; lldA = " << intmkl_lldA << std::endl;
doubleAr_A = (double*)mkl_calloc(intmkl_nRowProc*intmkl_nColProc, sizeof(double), 64);
for (int int_cnt1 = 0; int_cnt1 < intmkl_nRowProc*intmkl_nColProc; int_cnt1++) doubleAr_A[int_cnt1] = 1.0;
/* compute precise length of local pieces and allocate array on each process for parts of distributed matrices */
intmkl_MB = (MKL_INT)int_numGlobalRowMatA;
intmkl_NB = (MKL_INT)int_numGlobalColMatA;
intmkl_MBB = (MKL_INT)int_numLocalRowMatA;
intmkl_NBB = (MKL_INT)int_numGlobalColMatA;
intmkl_nRowProc = NUMROC(&intmkl_MB, &intmkl_MBB, &intmkl_myRow, &intmkl_zero, &intmkl_nProcRows);
intmkl_nColProc = NUMROC(&intmkl_NB, &intmkl_NBB, &intmkl_myCol, &intmkl_zero, &intmkl_nProcCols);
intmkl_lldB = MAX(1, intmkl_nRowProc);
DESCINIT(descB, &intmkl_MB, &intmkl_NB, &intmkl_MBB, &intmkl_NBB, &intmkl_zero, &intmkl_zero, &intmkl_ctxt, &intmkl_lldB, &intmkl_info);
std::cout << "MB = " << intmkl_MB << "; NB = " << intmkl_NB << "; intmkl_nRowProc = " << intmkl_nRowProc << "; intmkl_nColProc = " << intmkl_nColProc << "; lldB = " << intmkl_lldB << std::endl;
doubleAr_B = (double*)mkl_calloc(intmkl_nRowProc*intmkl_nColProc, sizeof(double), 64);
/* copy value from matrix A to matrix B */
PDGEMR2D(&intmkl_MA, &intmkl_NA, doubleAr_A, &intmkl_zero, &intmkl_zero, descA, doubleAr_B, &intmkl_zero, &intmkl_zero, descB, &intmkl_ctxt);
/* destroy variables */
mkl_free(doubleAr_A);
mkl_free(doubleAr_B);
/* finalize blacs */
BLACS_GRIDEXIT(&intmkl_ctxt);
/* finalize petsc*/
PetscFinalize();
}
I would appreciate if you can have any advice or comment on solving this problem.
Thank a lot,
Link Copied
I have solved the problem. It is due to the wrong value of ia, ja, ib and jb in PDGEMR2D.
Thanks
thanks for letting us know..
For more complete information about compiler optimizations, see our Optimization Notice.