#include #include #include #include #include void add(float *a, float *b, float *c,int size) { register __m256 t0, t1,t2; int i ,j; for(i = 0; i < size ; i++) { t0 = _mm256_loadu_ps(&a[i]); t1 = _mm256_loadu_ps(&b[i]); t2 = _mm256_add_ps(t0, t1); _mm256_storeu_ps(&c[i], t2); } } void main() { int size = 128; float *a,*b,*c; a = (float *) _mm_malloc(sizeof(float) * size,32); b = (float *) _mm_malloc(sizeof(float) * size,32); c = (float *) _mm_malloc(sizeof(float) * size,32); int i; for(i = 0; i < size; i++) { a[i] = i; b[i] = i; c[i] = 0; } add(a,b,c,size); for(i = 0; i < size; i++) { printf("%f %f %f \n",a[i],b[i],c[i]); } }