Intel® ISA Extensions
Use hardware-based isolation and memory encryption to provide more code protection in your solutions.
1116 Discussions

I understand Why SSE is slower than ANSI C

Yavorski__Nick
Beginner
574 Views
Hi, I wrote a very simple c program some days ago, I tried to optimize the code my program with sse, but i just understood sse slower than c. This program is very important for my work if you can help me optimize the code fast I will be very happy
 
code:
 
#include 
#include 
#include
typedef _declspec(align(16)) float vec3_t[3];
 
inline void vec_normalize_sse(vec3_t vec)
{
_asm {
mov esi, vec
movups xmm0, [esi]
movups xmm1, xmm0
mulps xmm1, xmm1
 
movups xmm2, xmm1
shufps xmm2, xmm1, 0xe1
movups xmm3, xmm1
shufps xmm3, xmm1, 0xc6
addps xmm1, xmm2
addps xmm1, xmm3
shufps xmm1, xmm1, 0x00
sqrtps xmm1, xmm1
divps xmm0, xmm1
 
movups [esi], xmm0
}
}
inline void vec_normalize_c(vec3_t vec)
{
float len;
len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2];
len = (float)sqrt(len);
len = 1.0f/len;
vec[0] *= len;
vec[1] *= len;
vec[2] *= len;
}
int main()
{
int i, s, e, count;
vec3_t vec;
count = 1000000;
vec[0] = 1.0f;
vec[1] = 2.0f;
vec[2] = 3.0f;
s = clock();
for (i = 0; i < count; i++) {
vec[0] += 0.1f;
vec[1] += 0.1f;
vec[2] += 0.1f;
vec_normalize_sse(vec);
}
e = clock();
printf("sse = %d, %f, %f, %f
", e - s, vec[0], vec[1], vec[2]);
 
vec[0] = 1.0f;
vec[1] = 2.0f;
vec[2] = 3.0f;
s = clock();
for (i = 0; i < count; i++) {
vec[0] += 0.1f;
vec[1] += 0.1f;
vec[2] += 0.1f;
vec_normalize_c(vec);
}
e = clock();
printf("c = %d, %f, %f, %f
", e - s, vec[0], vec[1], vec[2]);
getch();
return 0;
}
 
I wrote more about my work I work at  https://writer4sale.com/  and this program helps to us to do our easy write different articles and etc.
0 Kudos
0 Replies
Reply