#include #include #include #define printv16i(p,v) \ { \ __declspec(align(64)) int temp[16]; \ _mm512_store_si512((__m512i*)(temp), v); \ int z; \ printf("%8s:%8s = ",p,#v); \ for (z = 15 ; z >= 0 ; z--) printf("%4d ", temp[z]); \ printf("\n"); \ } #define printv16f(p,v) \ { \ __declspec(align(64)) float temp[16]; \ _mm512_store_ps((__m512*)(temp), v); \ int z; \ printf("%8s:%8s = ",p,#v); \ for (z = 15 ; z >= 0 ; z--) printf("%3.3f ", temp[z]); \ printf("\n"); \ } #define printv8i(p,v) \ { \ __declspec(align(32)) int temp[8]; \ _mm256_store_si256((__m256i*)(temp), v); \ int z; \ printf("%8s:%8s = ",p,#v); \ for (z = 7 ; z >= 0 ; z--) printf("%4d ", temp[z]); \ printf("\n"); \ } #define printv8f(p,v) \ { \ __declspec(align(32)) float temp[8]; \ _mm256_store_ps(temp, v); \ int z; \ printf("%8s:%8s = ",p,#v); \ for (z = 7 ; z >= 0 ; z--) printf("%3.3f ", temp[z]); \ printf("\n"); \ } int main(int argc, char **argv) { __declspec(align(64)) float data[16]; __declspec(align(64)) int index[16]; { for (int i = 0 ; i < 8 ; i++) { data[i] = (float)i; index[i] = 7-i; } __m256i iv = _mm256_load_si256((__m256i*)index); __m256 dv = _mm256_load_ps(data); printv8i("iv", iv); printv8f("dv", dv); __m256 pv = _mm256_permutevar8x32_ps(dv, iv); printv8f("pv", pv); } { for (int i = 0 ; i < 16 ; i++) { data[i] = (float)i; index[i] = 15-i; } __m512i iv = _mm512_load_si512((__m512i*)index); __m512 dv = _mm512_load_ps(data); printv16i("iv", iv); printv16f("dv", dv); __m512 pv = _mm512_permutevar_ps(dv, iv); printv16f("pv", pv); } }