#include #include #include #include #include /////////////////////////////////////////////////////////////////////////////// #define FPUTYPE double #define MX_SIZE 100 #define NUM_OF_THREADS 32 char g_szThreadToPU[] = "KMP_AFFINITY=compact,1,0,granularity=fine"; char g_MKL_DYNAMIC[] = "MKL_DYNAMIC=false"; /////////////////////////////////////////////////////////////////////////////// int main( int argc, char *argv[] ) { __attribute__( ( aligned( 64 ) ) ) FPUTYPE *pA = NULL; __attribute__( ( aligned( 64 ) ) ) FPUTYPE *pB = NULL; __attribute__( ( aligned( 64 ) ) ) FPUTYPE *pC = NULL; __attribute__( ( aligned( 64 ) ) ) FPUTYPE *pD = NULL; size_t m, n, p; size_t i, j, k; double time0, timeElapsed; int iError = 0; iError = putenv( &g_MKL_DYNAMIC[0]); if( iError != 0 ) { printf( "Failed to set Environment variable(s): %s\n", &g_MKL_DYNAMIC[0] ); return ( int )0; } iError = putenv( &g_szThreadToPU[0] ); if( iError != 0 ) { printf( "Failed to set Environment variable(s): %s\n", &g_szThreadToPU[0] ); return ( int )0; } omp_set_num_threads( NUM_OF_THREADS ); m = p = n = MX_SIZE; pA = ( FPUTYPE * )_mm_malloc( (m*p) * sizeof( FPUTYPE ) , 64); pB = ( FPUTYPE * )_mm_malloc( (p*n) * sizeof( FPUTYPE ) , 64); pC = ( FPUTYPE * )_mm_malloc( (m*n) * sizeof( FPUTYPE ) , 64); pD = ( FPUTYPE * )_mm_malloc( (m*n) * sizeof( FPUTYPE ) , 64); if( pA == NULL || pB == NULL || pC == NULL || pD == NULL ) { printf( "ERROR: Can't allocate memory for matrices\n" ); _mm_free( pA ); _mm_free( pB ); _mm_free( pC ); _mm_free( pD ); return ( int )0; } printf( "Initializing matrix data\n" ); time0 = dsecnd(); for( i = 0; i < ( m*p ); i += 1 ) pA[i] = ( FPUTYPE )1.0; for( i = 0; i < ( p*n ); i += 1 ) pB[i] = ( FPUTYPE )1.0; for( i = 0; i < ( m*n ); i += 1 ){ pC[i] = ( FPUTYPE )0.0; pD[i] = ( FPUTYPE )0.0; } cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,m, n, p, 1, pA, p, pB, n, 0, pC, n); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans,m, n, p, 1, pA, p, pB, n, 0, pD, n); timeElapsed = dsecnd() - time0; printf("time elapsed is %f\n",timeElapsed); /*for(i=0; i