#include #include #include #include "mkl_types.h" #include "mkl_cblas.h" #include "mkl_service.h" #include "matmult.h" int main() { test_cgemm(); return 0; } int test_cgemm() { MKL_Complex8 *A, *B, *C; MKL_INT m = 51200; MKL_INT n = 8; MKL_INT k = 40; int i, mult_it; int mult_rep = 200; MKL_Complex8 alpha, beta; struct timeval tv1, tv2; double mult_time; mkl_set_num_threads(mkl_get_max_threads()); alpha.real = (float)1.0; alpha.imag = (float)0.0; beta.real = (float)0.0; beta.imag = (float)0.0; A = (MKL_Complex8 *)mkl_malloc( m*k*sizeof( MKL_Complex8 ), 64 ); B = (MKL_Complex8 *)mkl_malloc( k*n*sizeof( MKL_Complex8 ), 64 ); C = (MKL_Complex8 *)mkl_malloc( m*n*sizeof( MKL_Complex8 ), 64 ); for (i = 0; i < (m*k); i++) { A[i].real = (float)(i+1); A[i].imag = (float)(-i+1); } for (i = 0; i < (k*n); i++) { B[i].real = (float)(-2*i-1); B[i].imag = (float)(i+1); } for (i = 0; i < (m*n); i++) { C[i].real = 0.0; C[i].imag = 0.0; } gettimeofday(&tv1, NULL); for (mult_it = 0; mult_it < mult_rep; mult_it++) { cblas_cgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, m, n, k, &alpha, A, m, B, k, &beta, C, m); } gettimeofday(&tv2, NULL); mult_time = ((double) (tv2.tv_usec - tv1.tv_usec) / 1000000 + (double) (tv2.tv_sec - tv1.tv_sec)) / (double) (mult_rep); mkl_free(A); mkl_free(B); mkl_free(C); printf("Elapsed time per multiplication: %f ms\n", mult_time*1000); return 0; }