/* * demo.c : testcase to show that icc 12.1.0 may generate incorrect code * with -O2 -openmp * * Platform : Linux x86-64 (Debian 6.0) * Compiler : icc version 12.1.0 (l_ccompxe_intel64_2011.7.256) * Flags : -O2 -openmp * * The bug shows only if compiled with both flags -O2 and -openmp together * * The result of the application built with both flags is always wrong and * additionally varying with each run. * * Compiling with '-O2' or '-O0 -openmp' flags results in correct code. * */ #include #include #ifdef _OPENMP #include #endif typedef union { uint64_t number; struct { uint32_t one :13; uint32_t : 3; uint32_t two :13; uint32_t : 3; uint32_t three :13; uint32_t : 3; uint32_t four :13; uint32_t : 3; } fields; } MyUnion; static __inline uint32_t foo(MyUnion h) { return h.fields.four | h.fields.three | h.fields.two | h.fields.one; } static __inline uint32_t bar(MyUnion h0, MyUnion h1, MyUnion h2) { MyUnion h; h.number = h0.number | h1.number; h.number = h.number | h2.number; // no bug if bitwise-or is written as // h.number = h0.number | h1.number | h2.number; // no bug either, if foo() is 'inlined' here 'manually' return foo(h); } unsigned long calc(unsigned long n_iterations) { MyUnion t1, t2, t3; unsigned long sum = 0; unsigned long k; t1.number = 0; t1.fields.one = 1; t2.number = 0; t2.fields.two = 2; t3.number = 0; t3.fields.three= 4; #ifdef _OPENMP #pragma omp parallel for schedule(static) reduction(+:sum) #endif for (k=n_iterations; k>0; --k) { sum += bar(t1, t2, t3); } return sum; } int main(int argc, char **argv) { unsigned long sum, n_iterations = 100000; sum = calc(n_iterations); printf("sum = %lu (should be %lu)\n", sum, 7*n_iterations); return 0; }