Prashanthns

11-29-2010
no gain using intrisics

#include

#include

int main()

{

static const float F[4] = { 0.12499890589348f,

0.11994905818636f,

0.10575346285725f,

0.08503254418207f};

float array[4], result=0.0f;

int j,m;

array[0] = F[0]+F[1];

array[1] = F[1]+F[2];

array[2] = F[2]+F[3];

array[3] = F[3]+F[0];

for(m=0; m<100; m++)

for(j=0;j<100000;j++)

{

#if 0

/* C implementation - takes 0.120secs */

result += (F[0]*array[0] + F[1]*array[1] + F[2]*array[2] + F[3]*array[3]);

#else

__m128 reg0, reg1;

float tmp[4];

reg0 = _mm_loadu_ps(F);

reg1 = _mm_loadu_ps(array);

reg0 = _mm_mul_ps(reg0, reg1);

/* using scalar addition - takes 0.124secs*/

_mm_storeu_ps(tmp, reg0);

result += (tmp[0]+tmp[1]+tmp[2]+tmp[3]);

}

}

I converted the filtering into SIMD intrinsics but its not giving any gain. Is there any particular reason for not getting gain here??. I tried writing hand assembly also which was of no benefit !!

