#include "matrices.h" void print_mat(real *A, int noRowsToPrint, int noColsToPrint, int n, int m){ for (int i = 0; i < noRowsToPrint; ++i){ for (int j = 0; j < noColsToPrint; ++j){ printf("%.3e\t", A[i * m + j]); } printf("\n"); } } void fprint_mat(real *A, int noRowsToPrint, int noColsToPrint, int n, int m, FILE *deb){ for (int i = 0; i < noRowsToPrint; ++i){ for (int j = 0; j < noColsToPrint; ++j){ fprintf(deb, "%.3e\t", A[i * m + j]); } fprintf(deb, "\n"); } } void mat_gemtm(const Matrix *A, const Matrix* B, Matrix* C, const real alpha, const real beta) { cblas_gemm(CblasRowMajor, CblasTrans, CblasNoTrans, A->num_cols, B->num_cols, A->num_rows, alpha, A->A, A->num_cols, B->A, B->num_cols, beta, C->A, C->num_cols); } void submat_gemtm(const SubMatrix *A, const SubMatrix *B, SubMatrix *C, const real alpha, const real beta) { const real* SA = A->M->A + SUBMATIDX(A, 0, 0); const real* SB = B->M->A + SUBMATIDX(B, 0, 0); real* SC = C->M->A + SUBMATIDX(C, 0, 0); cblas_gemm(CblasRowMajor, CblasTrans, CblasNoTrans, A->num_cols, B->num_cols, A->num_rows, alpha, SA, A->M->num_cols, SB, B->M->num_cols, beta, SC, C->M->num_cols); } void mat_syrk(const Matrix *A, Matrix *C, const real alpha, const real beta) { cblas_syrk(CblasRowMajor, CblasLower, CblasTrans, C->num_rows, A->num_rows, alpha, A->A, A->num_cols, beta, C->A, C->num_cols); } void submat_syrk(const SubMatrix *A, SubMatrix *C, const real alpha, const real beta) { const real* SA = A->M->A + SUBMATIDX(A, 0, 0); real* SC = C->M->A + SUBMATIDX(C, 0, 0); cblas_syrk(CblasRowMajor, CblasLower, CblasTrans, C->num_rows, A->num_rows, alpha, SA, A->M->num_cols, beta, SC, C->M->num_cols); } real mat_err(const Matrix* A, const Matrix *B) { if (A->num_rows != B->num_rows || A->num_cols != B->num_cols) { fprintf(stderr, "AtA::mat_err::MATRIX_SIZE: Incompatible matrix sizes (%lu-by-%lu and %lu-by-%lu).\n", A->num_rows, A->num_cols, B->num_rows, B->num_cols); return -1.0; } register integer i; real Result = 0; for (i = 0; i < A->num_rows * A->num_cols; i++) { real dij = A->A[i] - B->A[i]; Result += dij * dij; } return sqrt(Result); } void scalapack_gemtm(SubMatrix* SA, SubMatrix* SB, SubMatrix* SC, int size, int* IDs, MKL_INT icontxtFather) { size_t N = SB->num_rows; size_t M = SA->num_cols; size_t K = SB->num_cols; //MPI_Comm_rank(COMM, &myid); for (int i = 0; i < size; ++i) printf("%d\n", IDs[i]); //printf("there are %d processes in this communicator\n\n", size); real* A = SA->M->A + SUBMATIDX(SA, 0, 0); real* B = SB->M->A + SUBMATIDX(SB, 0, 0); real* C = SC->M->A + SUBMATIDX(SC, 0, 0); // real* A = (real*)malloc(N * M * sizeof(real)); // real* B = (real*)malloc(N * K * sizeof(real)); // real *C = (real*)calloc(K * M, sizeof(real)); long long icontxt; long long what = 0; long long npcol;// qui npcol = 1; long long nprow;// qui nprow = size; long long myrow, mycol; int dims[] = {0, 0}; MPI_Dims_create(size, 2, dims); nprow = dims[0]; npcol = dims[1]; printf("grid size %dx%d\n", nprow, npcol); long long negone = -1; long long one = 1; long long zero = 0; long long n = (long long)N; long long m = (long long)M; long long k = (long long)K; MKL_INT descA[9], descB[9], descC[9], descSubA[9], descSubB[9], descSubC[9]; // MKL_INT *imap = (MKL_INT*)calloc(nprow * npcol, sizeof(MKL_INT)); // for (int i = 0; i < nprow * npcol; i++){ // imap[i] = IDs[i]; // } MKL_INT imap[size]; for (int i = 0; i < size; i++){ imap[i] = (MKL_INT)IDs[i]; printf("imap[%d] = %d\n", i, imap[i]); } icontxt = imap[0]; //sys2blacs_handle_ (NULL); printf("before get**\n"); blacs_get(&negone, &what, &icontxt); printf("before gridmap**\n"); blacs_gridmap(&icontxt, imap, &nprow, &nprow, &npcol); //blacs_gridinit( &icontxt, "R", &nprow, &npcol ); //printf("icontxt %d grid size %dx%d myid %d\n", icontxt, nprow, npcol, myid); printf("before gridinfo**\n"); blacs_gridinfo( &icontxt, &nprow, &npcol, &myrow, &mycol ); printf("GRID DONE**\n"); MKL_INT np, nrp, mp, mrp, kp, krp, nb = 1, mb = 1; //printf("grid size %dx%d\n", nprow, npcol); np = numroc_( &n, &nb, &mycol, &zero, &npcol ); nrp = numroc_( &n, &nb, &myrow, &zero, &nprow ); mp = numroc_( &m, &mb, &mycol, &zero, &npcol ); mrp = numroc_( &m, &mb, &myrow, &zero, &nprow ); kp = numroc_( &k, &mb, &mycol, &zero, &npcol ); krp = numroc_( &k, &mb, &myrow, &zero, &nprow ); // Compute parameters for MKL long long lldA = (SA->M->num_cols > 1) ? SA->M->num_cols : 1; long long lldB = (SB->M->num_cols > 1) ? SB->M->num_cols : 1; long long lldC = (SC->M->num_cols > 1) ? SC->M->num_cols : 1; long long lldSubA = (mrp > 1) ? mrp : 1; long long lldSubB = (krp > 1) ? krp : 1; long long lldSubC = (krp > 1) ? krp : 1; MKL_INT info; real dzero = 0; real done = 1; real *subA = (real*)calloc(np * mrp, sizeof(real)); real *subB = (real*)calloc(np * krp, sizeof(real)); real *subC = (real*)calloc(mp * krp, sizeof(real)); // Matrix descriptors descinit_(descA, &m, &n, &m, &n, &zero, &zero, &icontxt, &lldA, &info); descinit_(descB, &k, &n, &k, &n, &zero, &zero, &icontxt, &lldB, &info); descinit_(descC, &k, &m, &k, &m, &zero, &zero, &icontxt, &lldC, &info); // Distributed matrix descriptors descinit_(descSubA, &m, &n, &nb, &mb, &zero, &zero, &icontxt, &lldSubA, &info); descinit_(descSubB, &k, &n, &nb, &mb, &zero, &zero, &icontxt, &lldSubB, &info); descinit_(descSubC, &k, &m, &mb, &mb, &zero, &zero, &icontxt, &lldSubC, &info); // Distribute A and B pdgeadd_("N", &m, &n, &done, A, &one, &one, descA, &dzero, subA, &one, &one, descSubA); pdgeadd_("N", &k, &n, &done, B, &one, &one, descB, &dzero, subB, &one, &one, descSubB); // Compute product pdgemm_("N","T", &k, &m, &n, &done, subB, &one, &one, descSubB, subA, &one, &one, descSubA, &dzero, subC, &one, &one, descSubC); printf("in SCALAPACK, multiplication done\n"); // Retrieve C pdgeadd_("N", &k, &m, &done, subC, &one, &one, descSubC, &dzero, C, &one, &one, descC); //printf("Comm: %d %d, we are done id %d\n\n", IDs[0], IDs[1], myid); free(subA); free(subB); free(subC); //blacs_freebuff(&icontxt, &zero); blacs_gridexit(&icontxt); blacs_exit(&one); return; }