- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I am trying to optimize a simple loop with SIMD SSE instructions. The code compute a 2D field and calculate heat propagation trough time.
Loops use only simple operations, and reference loop is really simple.
The following program contains 3 loops : loop 0 is the reference, loop 1 is a decomposition of the operations, and loop 2 is the optimize loop. However, the second loop is slightly faster than the first one, and the optimize loop is really slow. I am using x64 gcc 4.7 and icc 11 compilers, sames results with -O2, -O3, -msse2/-xSSE2.
Did I make something wrong ?
With my best regards.
Moomba
[csharp]
#include <stdio.h>
#include <time.h>
#include<emmintrin.h>
#define n1 102
#define n2 102
#define niter 20000
double U0[n1][n2];
double U1[n1][n2];
double U2[n1][n2]; /* buffer*/
double *cU0[n1][n2];
/* simd */
__m128d vU0[n1][n2];
__m128d vU1[n1][n2];
__m128d vU2[n1][n2]; /* buffer*/
int i,j,t;
double Dx,Dy,Lx,Ly,InvDxDx,InvDyDy,Dt,alpha,totaltime,Stab,DtAlpha,DxDx,DyDy;
clock_t time0,time1;
FILE *f1;
int main()
{
/* ---- GENERAL ---- */
alpha = 0.4;
totaltime = 1.0;
Dt = totaltime/((niter-1)*1.0);
Lx = 1.0;
Ly = 1.0;
Dx = Lx/((n1-1)*1.0);
Dy = Ly/((n2-1)*1.0);
InvDxDx = 1.0/(Dx*Dx);
InvDyDy = 1.0/(Dy*Dy);
DxDx = Dx*Dx;
DyDy = Dy*Dy;
Stab = alpha*Dt*(InvDxDx+InvDyDy);
DtAlpha = Dt*alpha;
/* Stability if result <= 0.5 */
printf("Stability factor : %f \n",Stab);
/* +----------------+ */
/* | LOOP 0 | */
/* +----------------+ */
/* Init */
for( i = 0; i < n1; i++)
{
for( j = 0; j < n2; j++)
{
U0
U1
}
}
for( i = 0; i < n1; i++)
{
U0[0] = 1.0;
U1[0] = 1.0;
}
printf("Init OK \n");
/* Core */
time0=clock();
for( t = 0; t < niter; t++)
{
/* even */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
U1
}
}
/* odd */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
U0
}
}
}
time1=clock();
printf("Loop 0, total time : %f \n", (double) time1-time0);
f1 = fopen ("out0.dat", "wt");
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
fprintf (f1, "%d\t%d\t%f\n", i, j, U0
}
}
/* +----------------+ */
/* | LOOP 1 | */
/* +----------------+ */
/* Init */
for( i = 0; i < n1; i++)
{
for( j = 0; j < n2; j++)
{
U0
U1
}
}
for( i = 0; i < n1; i++)
{
U0[0] = 1.0;
U1[0] = 1.0;
}
printf("Init OK \n");
/* Core */
time0=clock();
for( t = 0; t < niter; t++)
{
/* even */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
/* U1
U1
U1
U1
U1
U2
U2
U2
U2
U1
U1
U1
}
}
/* odd */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
U0
U0
U0
U0
U2
U2
U2
U2
U0
U0
U0
}
}
}
time1=clock();
printf("Loop 1, total time : %f \n", (double) time1-time0);
/* End */
f1 = fopen ("out1.dat", "wt");
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
fprintf (f1, "%d\t%d\t%f\n", i, j, U0
}
}
/* +----------------+ */
/* | LOOP 3 | */
/* +----------------+ */
/* Init */
for( i = 0; i < n1; i++)
{
for( j = 0; j < n2; j++)
{
vU0
vU1
}
}
for( i = 0; i < n1; i++)
{
vU0[0] = _mm_set1_pd(1.0);
vU1[0] = _mm_set1_pd(1.0);
}
printf("Init OK \n");
/* Core */
time0=clock();
for( t = 0; t < niter; t++)
{
/* even */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j+=2)
{
/* U1
__m128d va = _mm_set1_pd(-2.0); /* U1
vU1
vU1
vU1
__m128d vb = _mm_set1_pd(InvDxDx); /* U1
vU1
vU2
vU2
vU2
__m128d vc = _mm_set1_pd(InvDyDy); /* U2
vU2
vU1
__m128d vd = _mm_set1_pd(DtAlpha); /* U1
vU1
vU1
}
}
/* odd */
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j++)
{
__m128d va = _mm_set1_pd(-2.0); /* U0
vU0
vU0
vU0
__m128d vb = _mm_set1_pd(InvDxDx); /* U0
vU0
vU2
vU2
vU2
__m128d vc = _mm_set1_pd(InvDyDy); /* U2
vU2
vU0
__m128d vd = _mm_set1_pd(DtAlpha); /* U0
vU0
vU0
}
}
}
time1=clock();
printf("Loop 3, total time : %f \n", (double) time1-time0);
/* End */
f1 = fopen ("out3.dat", "wt");
for( i = 1; i < n1-1; i++)
{
for( j = 1; j < n2-1; j+=2)
{
fprintf (f1, "%d\t%d\t%f\n", i, j, vU0
}
}
}
[/csharp]
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page