// _________________________________________________________________________ // | cg_mkl_csr_intel.c | // | test the CG method with the MKL function for the CSR format | // | yyyy-mm-dd | // | 2018-03-16 | // | Edoardo Coronado | // |_________________________________________________________________________| #include #include #include #include #include #include #ifndef BM #define BM { printf( "\nFile: %s Line: %d.\n", __FILE__, __LINE__ ); } #endif #ifndef ABORT #define ABORT { printf( "\nFile: %s Line: %d execution is aborted.\n", __FILE__, __LINE__ ); exit( EXIT_FAILURE ); } #endif #ifndef ALLOC #define ALLOC alloc_if(1) #endif #ifndef REUSE #define REUSE alloc_if(0) #endif #ifndef FREE #define FREE free_if(1) #endif #ifndef RETAIN #define RETAIN free_if(0) #endif #ifndef WARM_ITE #define WARM_ITE 0 #endif #ifndef RUN_ITE #define RUN_ITE 1 #endif typedef struct { int nnz; int nrows; double * val; int * col; int * row; } str_mtx_csr; typedef struct { int rmin; double rave; int rmax; double rsd; double rsdp; int * rl; } str_mtx_sta; typedef struct { char solName[18]; int solNumIte; double solNrmRes; double solErrVal; int solErrPos; double solTime; } str_sol; // Read matrix from formated file in CSR storage scheme // return none // @param in matFileName: file where the matrix is stored // @param out mat: structure containing the arrays of the matrix static void matrix_reading( const char * matFileName, str_mtx_csr * mat ) { if ( strstr( matFileName, ".txt" ) != NULL ) { FILE * fh; fh = fopen( matFileName, "r" ); if ( fh == NULL ) { printf( "matrix_reading is unable to open .csr file\n" ); exit( EXIT_FAILURE ); } if ( fscanf( fh, "%d %d", &(mat->nrows), &(mat->nnz) ) != 2 ) ABORT; mat->val = (double *) malloc( mat->nnz * sizeof(double) ); mat->col = (int *) malloc( mat->nnz * sizeof(int) ); mat->row = (int *) malloc( ( mat->nrows + 1) * sizeof(int) ); int i; for ( i = 0; i < ( mat->nnz ); i++ ) if ( fscanf( fh, "%lf %d\n", &( mat->val[i] ), &( mat->col[i] ) ) != 2 ) ABORT; for ( i = 0; i < ( mat->nrows + 1 ); i++ ) if ( fscanf( fh, "%d", &(mat->row[i]) ) != 1 ) ABORT; fclose( fh ); } return; } // Initialize array // Initialize array with a multiple of 'value' // return none // @param in lenArray: length of the array // @param in value: value used to initialize array // @param inout array: array to be initialized static void init_array( const int lenArray, const double value, double * array ) { int i = 0; for ( ; i < lenArray; i++ ) array[i] = value * (double) i; return; } // Set array // Set all array's element to a fixed 'value' // return none // @param in lenArray: length of the array // @param in value: value used to set array // @param inout array: array to be initialized static void set_array( const int lenArray, const double value, double * array ) { int i = 0; for ( ; i < lenArray; i++ ) array[i] = value; return; } // Calculate error in norm infinity // return none // @param in lenArray: length of the arrays // @param in refArray: array containing the reference values // @param in tarArray: array containing the values to be compared // @param out ei: structure where the error value and position will be saved static void error_infinity( const int lenArray, const double * refArray, const double * tarArray, str_sol * sol ) { double d = 0.0, m = 1e-100; int i = 0, p = 0; for ( ; i < lenArray; i++ ) { d = fabs( refArray[i] - tarArray[i] ); if ( d > m ) { m = d; p = i; } } sol->solErrVal = m; sol->solErrPos = p; return; } // CG solver for system A * x = b // A is a sparse matrix stored in CSR format. // @return none // @param in numThrds: number of threads for offload parallel region // @param in maxIteSol: maximum number of iterations for solver // @param in tolSol: tolerance for solver // @param in mat: matrix in CSR format // @param in b: vector right hand side // @param out x: vector incognite // @param out sol: array to store number of iterations and the residual's norm // CG Algorithm. // IF ( ||b||2 == 0 ) => x=0 => exit // Compute r = - A * x0 + b // rho = (r . r) // ||r||2 = rho ^ 0.5 // IF ( ||r||2 == 0 ) => exit ELSE eps = tol * ||r||2 // p = r // FOR i = 0, 1, 2, ..., maxI DO // v = A * p // sigma = (v . p) // alpha = rho / sigma // x = alpha * p + x // rn = -alpha * v + r // rhon = (rn . rn) // if ( ||rn||2 < eps ) => exit // beta = rhon / rho // r = rn // p = beta * p + r // rho = rhon // End FOR static void cg_csr_mkl_off( const int numThrds, const int maxIteSol, const double tolSol, const str_mtx_csr mat, const double * b, double * x, str_sol * sol ) { // function's name strcpy( sol->solName, "cg_csr_mkl_off" ); // variables for MKL functions int nrows = mat.nrows; int nnz = mat.nnz; int incx = 1, incy = 1; char transa = 'N'; // variables for the CG algorithm int ind = 0; double nrm_b = 0.0, MUNO = -1.0, rho = 0.0, nrm_r = 0.0, eps = 0.0, sigma = 0.0, alpha = 0.0, MALPHA = 0.0, rhon = 0.0, beta = 0.0, UNO = 1.0; double * Ax = (double *) calloc( mat.nrows, sizeof(double) ); double * r = (double *) calloc( mat.nrows, sizeof(double) ); double * p = (double *) calloc( mat.nrows, sizeof(double) ); double * v = (double *) calloc( mat.nrows, sizeof(double) ); // begin the CG algorithm nrm_b = dnrm2( &nrows, b, &incx ); // nrm_b = || b ||2 if ( nrm_b == 0.0 ) // TEST nrm_b IF ( nrm_b == 0) => x = 0 & EXIT { set_array( nrows, 0.0 , x ); sol->solNumIte = 0; sol->solNrmRes = 0.0; printf( "cg_csr_mkl: x = 0.\n" ); return; } BM; #pragma offload target( mic: 0 ) \ in( transa, nrows ) \ in( mat.val: length(nnz) ALLOC RETAIN ) \ in( mat.row: length(nrows+1) ALLOC RETAIN ) \ in( mat.col: length(nnz) ALLOC RETAIN ) \ in( x: length(nrows) ALLOC FREE ) \ out( Ax: length(nrows) ALLOC FREE ) \ num_threads( numThrds ) { mkl_cspblas_dcsrgemv( &transa, &nrows, mat.val, mat.row, mat.col, x, Ax ); // Ax = A * x } BM; dcopy( &nrows, b, &incx, r, &incy ); // r = b daxpy( &nrows, &MUNO, Ax, &incx, r, &incy ); // r = -Ax + r ( r = -Ax + b ) rho = ddot( &nrows, r, &incx, r, &incy ); // rho = ( r . r ) nrm_r = sqrt( rho ); // nrm_r = rho ^ 0.5 if ( nrm_r == 0.0 ) // TEST nrm_r IF ( nrm_r == 0) => EXIT ELSE eps = tol * nrm_r { sol->solNumIte = 0; sol->solNrmRes = 0.0; printf( "cg_csr_mkl: x = x.\n" ); return; } eps = tolSol * nrm_r; // eps = tol * nrm_r dcopy( &nrows, r, &incx, p, &incy ); // p = r for ( ; ind < maxIteSol; ind++ ) { BM; #pragma offload target( mic: 0 ) \ in( transa, nrows ) \ nocopy( mat.val: length(nnz) REUSE RETAIN ) \ nocopy( mat.row: length(nrows+1) REUSE RETAIN ) \ nocopy( mat.col: length(nnz) REUSE RETAIN ) \ in( p: length(nrows) ALLOC FREE ) \ out( v: length(nrows) ALLOC FREE ) \ num_threads( numThrds ) { mkl_cspblas_dcsrgemv( &transa, &nrows, mat.val, mat.row, mat.col, p, v ); // v = A * p } BM; sigma = ddot( &nrows, v, &incx, p, &incy ); // sigma = ( v . p ) alpha = rho / sigma; // alpha = rho / sigma daxpy( &nrows, &alpha, p, &incx, x, &incy ); // x = alpha * p + x MALPHA = -alpha; daxpy( &nrows, &MALPHA, v, &incx, r, &incy ); // r = - alpha * v + r rhon = ddot( &nrows, r, &incx, r, &incy ); // rhon = ( rn . rn ) nrm_r = sqrt( rhon ); // nrm_r = rhon ^ 0.5 if ( nrm_r < eps ) // TEST nrm_r IF ( nrm_r < eps ) => EXIT { sol->solNumIte = ind; sol->solNrmRes = nrm_r; BM; #pragma offload_transfer target( mic: 0) \ nocopy( mat.val: length(nnz) REUSE FREE ) \ nocopy( mat.row: length(nrows+1) REUSE FREE ) \ nocopy( mat.col: length(nnz) REUSE FREE ) {} BM; return; } beta = rhon / rho; // beta = rhon / rho dscal( &nrows, &beta, p, &incx ); // p = beta * p daxpy( &nrows, &UNO, r, &incx, p, &incy ); // p = r + p rho = rhon; // UPDATE rho } sol->solNumIte = maxIteSol; sol->solNrmRes = nrm_r; free( Ax ); free( r ); free( p ); free( v ); BM; #pragma offload_transfer target( mic: 0) \ nocopy( mat.val: length(nnz) REUSE FREE ) \ nocopy( mat.row: length(nrows+1) REUSE FREE ) \ nocopy( mat.col: length(nnz) REUSE FREE ) {} BM; return; } int main( int argc, const char **argv ) { // --------------------------------------------------------------------------------------------------------------------------------------------------------------- if ( argc != 4 ) { printf( "\tUse:\t%s \n", argv[0] ); exit( EXIT_FAILURE ); } // --------------------------------------------------------------------------------------------------------------------------------------------------------------- char mtxFileName[48]; strcpy( mtxFileName, argv[1] ); int maxIteSol = atoi( argv[2] ); double tolSol = atof( argv[3] ); int maxThrdsDevice = 0; // --------------------------------------------------------------------------------------------------------------------------------------------------------------- printf( "Input arguments\n" ); printf( "mtxFileName: %s\n", mtxFileName ); printf( "maxIteSol: %d\n", maxIteSol ); printf( "tolSol: %lg\n", tolSol ); printf( "WARM_ITE: %d\n", (int) WARM_ITE ); printf( "RUN_ITE: %d\n", (int) RUN_ITE ); // --------------------------------------------------------------------------------------------------------------------------------------------------------------- str_mtx_csr mat; matrix_reading( mtxFileName, &mat ); // --------------------------------------------------------------------------------------------------------------------------------------------------------------- #pragma offload target(mic:0) { #pragma omp parallel #pragma omp master maxThrdsDevice = omp_get_max_threads(); } printf( "Maximum threads in parallel region\n" ); printf( "maxThdsDevice: %d\n", maxThrdsDevice ); // --------------------------------------------------------------------------------------------------------------------------------------------------------------- char transa = 'N'; double t1 = 0.0, t2 = 0.0, tr[WARM_ITE+RUN_ITE]; double * xr = (double *) calloc( mat.nrows, sizeof(double) ); double * b = (double *) calloc( mat.nrows, sizeof(double) ); double * x = (double *) calloc( mat.nrows, sizeof(double) ); str_sol sol[1]; int ITE; init_array( mat.nrows, 1.0, xr ); mkl_cspblas_dcsrgemv( &transa , &(mat.nrows), mat.val, mat.row, mat.col, xr, b ); for ( ITE = 0; ITE < ( WARM_ITE + RUN_ITE ); ITE++ ) { t1 = dsecnd(); cg_csr_mkl_off( maxThrdsDevice, maxIteSol, tolSol, mat, b, x, &sol[0] ); t2 = dsecnd(); tr[ITE] = t2 - t1; set_array( mat.nrows, 0.0, x ); } for ( ITE = WARM_ITE; ITE < ( WARM_ITE + RUN_ITE ); ITE++ ) sol[0].solTime = sol[0].solTime + tr[ITE]; sol[0].solTime = sol[0].solTime / (double) RUN_ITE; error_infinity( mat.nrows, xr, x, &sol[0] ); printf( "Solver's results\n" ); printf( "%18s %7s %8s %9s %11s\t %10s\n", "Solver", "solNumIte", "solNrmRes", "solErrVal", "solErrPos", "solTime" ); printf( "%18s: %7d %8.1lg %9.1lg %11d\t %10.8lf\n", sol[0].solName, sol[0].solNumIte, sol[0].solNrmRes, sol[0].solErrVal, sol[0].solErrPos, sol[0].solTime ); // --------------------------------------------------------------------------------------------------------------------------------------------------------------- free( xr ); free( b ); free( x ); exit( EXIT_SUCCESS ); }