Intel® MPI Library
Get help with building, analyzing, optimizing, and scaling high-performance computing (HPC) applications.
2154 Discussions

MKL ScaLAPACK + MVAPICH + 100 lines of code = CRASH

amolins
Beginner
324 Views
The following code has proved good to generate a crash using MKL 10.2 update 2 (sequential version and threaded), last revision of MVAPICH, in two different clusters. Can anybody tell me what the problem is here? It does not crash always, but it does crash when the right number of MPI processes and matrix sizes are selected.

A

/*

* crash.cpp - crashes with ICC 11.1, MKL 10.2, MVAPICH 1.0 on linux 64-bit

* both linked with the serial or threaded libraries

* doing mpirun -np 36 crash 5000 10

*/

#include

#include

#include

#include

#include"mpi.h"

#include "mkl_scalapack.h"

extern "C" {

/* BLACS C interface */

void Cblacs_get(int context,int request,int* value);

int Cblacs_gridinit(int* context,char * order,int np_row,int np_col);

void Cblacs_gridinfo(int context,int* np_row,int* np_col,int* my_row,int* my_col);

int numroc_(int *n,int *nb,int *iproc,int *isrcproc,int *nprocs);

/* PBLAS */

void pdgemm_( char *TRANSA,char *TRANSB,int * M,int * N,int * K,double * ALPHA,

double * A,int * IA,int * JA,int * DESCA,double * B,int * IB,int * JB,int * DESCB,

double * BETA,double * C,int * IC,int * JC,int * DESCC );

}

#define BLOCK_SIZE65

int main(int argc,char* argv[] )

{

int iam, nprocs;

MPI_Init(&argc,&argv); /* starts MPI */

MPI_Comm_rank(MPI_COMM_WORLD, &iam);

MPI_Comm_size(MPI_COMM_WORLD, &nprocs);

// get done with the ones that are not part of the grid

int blacs_pgrid_size =floor(sqrt(nprocs));

if (iam>=blacs_pgrid_size*blacs_pgrid_size) {

printf("Bye bye world from process %d of %d. BLACS had no place for me...\n",iam,nprocs);

MPI_Finalize();

}

// start BLACS with square processor grid

if(iam==0)

printf("starting BLACS...");

int ictxt,nprow,npcol,myrow,mycol;

Cblacs_get( -1,0, &ictxt );

Cblacs_gridinit( &ictxt,"C", blacs_pgrid_size, blacs_pgrid_size );

Cblacs_gridinfo( ictxt, &nprow, &npcol, &myrow, &mycol );

if(iam==0)

printf("done.\n");

double timing;

int m,n,k,lm,ln,nbm,nbn,rounds;

int myzero=0,myone=1;

sscanf(argv[1],"%d",&m);

n=m;

k=m;

sscanf(argv[2],"%d",&rounds);

nbm = BLOCK_SIZE;

nbn = BLOCK_SIZE;

lm =numroc_(&m, &nbm, &myrow, &myzero, &nprow);

ln =numroc_(&n, &nbn, &mycol, &myzero, &npcol);

int info;

int *ipiv = new int[lm+nbm+10000000]; //adding a "little" bit of extra space just in case

char ta ='N',tb ='T';

double alpha =1.0, beta =0.0;

double* test1data =new double[lm*ln];

double* test2data =new double[lm*ln];

double* test3data =new double[lm*ln];

for(int i=0;i

test1data=(double)(rand()%100)/10000.0;

int *test1desc =new int[9];

int *test2desc =new int[9];

int *test3desc =new int[9];

test1desc[0] =1; // descriptor type

test1desc[1] = ictxt; // blacs context

test1desc[2] = m; // global number of rows

test1desc[3] = n; // global number of columns

test1desc[4] = nbm; // row block size

test1desc[5] = nbn; // column block size (DEFINED EQUAL THAN ROW BLOCK SIZE)

test1desc[6] = 0; // initial process row(DEFINED 0)

test1desc[7] = 0; // initial process column (DEFINED 0)

test1desc[8] = lm; // leading dimension of local array

memcpy(test2desc,test1desc,9*sizeof(int));

memcpy(test3desc,test1desc,9*sizeof(int));

for(int iter=0;iter

{

if(iam==0)

printf("iter %i - ",iter);

//test2 = test1

memcpy(test2data,test1data,lm*ln*sizeof(double));

//test3 = test1*test2

timing=MPI_Wtime();

pdgemm_(&ta,&tb,&m,&n,&k,

α,

test1data,&myone,&myone,test1desc,

test2data,&myone,&myone, test2desc,

β,

test3data,&myone,&myone, test3desc);

if(iam==0)

printf(" PDGEMM = %f |",MPI_Wtime()-timing);

//test3 = LU(test3)

timing=MPI_Wtime();

pdgetrf_(&m, &n, test3data, &myone, &myone, test3desc, ipiv, &info);

if(iam==0)

printf(" PDGETRF = %f.\n",MPI_Wtime()-timing);

}

delete[] ipiv;

delete[] test1data, test2data, test3data;

delete[] test1desc, test2desc, test3desc;

MPI_Finalize();

return 0;

}

0 Kudos
2 Replies
mfactor
Beginner
324 Views
Hello,

Just tested your example using gcc 4.3.2 and mkl 10.2, with openmpi 1.3.3. No segfaults.
0 Kudos
mfactor
Beginner
324 Views
Also tested with icc 11.1, mkl 10.2, and openmpi 1.3.3. No problems.

0 Kudos
Reply