// icc -mkl -std=c99 daxpby_bug.c #include #include #include int main(void) { const MKL_INT n = 1000000; const MKL_INT inc = 1; const double one = 1.0; double * x = MKL_malloc(n * sizeof(*x), 32); double * y = MKL_malloc(n * sizeof(*y), 32); for (int t = 1; t <= 8; t++) { double start, end, count; mkl_set_num_threads(t); // warmup dsecnd(); daxpy(&n, &one, x, &inc, y, &inc); daxpby(&n, &one, x, &inc, &one, y, &inc); start = end = dsecnd(); count = 0; do { daxpy(&n, &one, x, &inc, y, &inc); count++; end = dsecnd(); } while(end - start < 2.0); printf("Threads=%d; %f daxpy() calls/sec\n", t, count / (end - start)); start = end = dsecnd(); count = 0; do { daxpby(&n, &one, x, &inc, &one, y, &inc); count++; end = dsecnd(); } while(end - start < 2.0); printf("Threads=%d; %f daxpby() calls/sec\n", t, count / (end - start)); } }