Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Raymond_S_
Beginner
117 Views

Difference between vs2010 and vs2015

Dear all, I wrote some code for testing FMA instructions, compiled in IDE vs 2010( of course using Intel c++ toolset), and there are not any FMA instructions generated. Then I compiled the same code in vs 2015, using the same Intel toolset, with same project configuration, and it generate FMA instructions successfully. I use Intel Parallel studio XE 2016 cluster

Is there any difference between vs2010 and vs2015?

Below is code:

#include "stdafx.h"
#include <Windows.h>
#include <immintrin.h>
 
bool fma_test()
{
	__m256 mmammbmmc;
	float a[8], b[8], c[8];
	for (int i = 0; i < 8; ++i)
	{
		a[i] = i;
		b[i] = i;
		c[i] = i;
	}
 
	mma = _mm256_load_ps(a);
	mmb = _mm256_load_ps(b);
	mmc = _mm256_load_ps(c);
	__m256 ret = _mm256_fmadd_ps(mmammbmmc);
	if(ret.m256_f32[7] == 56.0)
		return true;
	return false;
}
 
int main()
{
	if (fma_test())
	{
		printf("true");
	}
	else
		printf("false");
	system("pause");
	return 0;
}

Below is disassembly in VS2010 and 2015:

VS2010:

000000013FD30FFC  add         byte ptr [rax],al  
000000013FD30FFE  add         byte ptr [rax],al  
--- D:\sl\XR\MoFangG\xfma\xfma.cpp ---------------------------------------------
        return true;
    return false;
}

int main()
{
000000013FD31000  sub         rsp,78h  
000000013FD31004  mov         edx,9D9FFEh  
000000013FD31009  mov         qword ptr [rsp+60h],r13  
000000013FD3100E  lea         r13,[rsp+3Fh]  
000000013FD31013  mov         ecx,3  
000000013FD31018  and         r13,0FFFFFFFFFFFFFFE0h  
000000013FD3101C  mov         rax,qword ptr [__security_cookie (13FD36000h)]  
000000013FD31023  xor         rax,rsp  
000000013FD31026  mov         qword ptr [rsp+70h],rax  
000000013FD3102B  call        __intel_new_feature_proc_init (13FD318F0h)  
000000013FD31030  vstmxcsr    dword ptr [rsp+68h]  
000000013FD31036  or          dword ptr [rsp+68h],8040h  
000000013FD3103E  vldmxcsr    dword ptr [rsp+68h]  
// xfma.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <Windows.h>
#include <immintrin.h>

bool fma_test()
{
    __m256 mma, mmb, mmc;
    float a[8], b[8], c[8];
    for (int i = 0; i < 8; ++i)
    {
        a = i;
000000013FD31044  vcvtdq2ps   ymm0,ymmword ptr [__xi_z+30h (13FD33220h)]  
        b = i;
        c = i;
    }

    mma = _mm256_load_ps(a);
    mmb = _mm256_load_ps(b);
    mmc = _mm256_load_ps(c);
    __m256 ret = _mm256_fmadd_ps(mma, mmb, mmc);
000000013FD3104C  db          c4h  
000000013FD3104D  loop        main+0CCh (13FD310CCh)  
000000013FD3104F  test        al,0C0h  
000000013FD31051  vmovups     ymmword ptr [rbp],ymm0  
    if(ret.m256_f32[7] == 56.0)
000000013FD31057  vmovss      xmm1,dword ptr [rbp+1Ch]  
000000013FD3105D  vucomiss    xmm1,dword ptr [__xi_z+50h (13FD33240h)]  
000000013FD31065  jp          main+69h (13FD31069h)  
000000013FD31067  je          main+9Fh (13FD3109Fh)  
    }
    else
        printf("false");
000000013FD31069  lea         rcx,[__xi_z+54h (13FD33244h)]  
000000013FD31070  vzeroupper  
000000013FD31073  call        qword ptr [__imp_printf (13FD33180h)]  
    system("pause");
000000013FD31079  lea         rcx,[__xi_z+5Ah (13FD3324Ah)]  
000000013FD31080  call        qword ptr [__imp_system (13FD330E0h)]  
    return 0;
000000013FD31086  mov         rcx,qword ptr [rsp+70h]  
000000013FD3108B  xor         rcx,rsp  
000000013FD3108E  call        __security_check_cookie (13FD310D0h)  
000000013FD31093  mov         r13,qword ptr [rsp+60h]  
000000013FD31098  xor         eax,eax  
000000013FD3109A  add         rsp,78h  
000000013FD3109E  ret  
    if (fma_test())
    {
        printf("true");
000000013FD3109F  lea         rcx,[__xi_z+60h (13FD33250h)]  
000000013FD310A6  vzeroupper  
000000013FD310A9  call        qword ptr [__imp_printf (13FD33180h)]  
000000013FD310AF  jmp         main+79h (13FD31079h)  
000000013FD310B1  nop         dword ptr [rax+rax]  
000000013FD310B9  nop         dword ptr [rax]  
--- No source file -------------------------------------------------------------
000000013FD310C0  int         3  
000000013FD310C1  int         3  
000000013FD310C2  int         3  
000000013FD310C3  int         3  
000000013FD310C4  int         3  
000000013FD310C5  int         3  
000000013FD310C6  nop         word ptr [rax+rax]  
__security_check_cookie:
000000013FD310D0  cmp         rcx,qword ptr [__security_cookie (13FD36000h)]  
000000013FD310D7  jne         ReportFailure (13FD310EAh)  
000000013FD310D9  rol         rcx,10h  
000000013FD310DD  test        cx,0FFFFh  
000000013FD310E2  jne         RestoreRcx (13FD310E6h)  
000000013FD310E4  rep ret  
RestoreRcx:
000000013FD310E6  ror         rcx,10h  
ReportFailure:
000000013FD310EA  jmp         __report_gsfailure (13FD31440h)  
000000013FD310EF  int         3  
__GSHandlerCheckCommon:
000000013FD310F0  push        rbx  
000000013FD310F2  sub         rsp,20h  
000000013FD310F6  mov         r11d,dword ptr [r8]  
000000013FD310F9  mov         rbx,rdx  
000000013FD310FC  mov         r9,rcx  
000000013FD310FF  and         r11d,0FFFFFFF8h  
000000013FD31103  test        byte ptr [r8],4  
000000013FD31107  mov         r10,rcx  
000000013FD3110A  je          __GSHandlerCheckCommon+2Fh (13FD3111Fh)  
000000013FD3110C  mov         eax,dword ptr [r8+8]  
000000013FD31110  movsxd      r10,dword ptr [r8+4]  
000000013FD31114  neg         eax  
000000013FD31116  add         r10,rcx  
000000013FD31119  movsxd      rcx,eax  
000000013FD3111C  and         r10,rcx  
000000013FD3111F  movsxd      rax,r11d  
000000013FD31122  mov         rdx,qword ptr [rax+r10]  
000000013FD31126  mov         rax,qword ptr [rbx+10h]  
000000013FD3112A  mov         ecx,dword ptr [rax+8]  
000000013FD3112D  add         rcx,qword ptr [rbx+8]  
000000013FD31131  test        byte ptr [rcx+3],0Fh  
000000013FD31135  je          __GSHandlerCheckCommon+53h (13FD31143h)  
000000013FD31137  movzx       eax,byte ptr [rcx+3]  
000000013FD3113B  and         eax,0FFFFFFF0h  
000000013FD3113E  cdqe  
000000013FD31140  add         r9,rax  
000000013FD31143  xor         r9,rdx  
000000013FD31146  mov         rcx,r9  
000000013FD31149  add         rsp,20h  
000000013FD3114D  pop         rbx  
000000013FD3114E  jmp         __security_check_cookie (13FD310D0h)  
000000013FD31153  int         3  
__GSHandlerCheck:
000000013FD31154  sub         rsp,28h  
000000013FD31158  mov         r8,qword ptr [r9+38h]  
000000013FD3115C  mov         rcx,rdx  
000000013FD3115F  mov         rdx,r9  
000000013FD31162  call        __GSHandlerCheckCommon (13FD310F0h)  
000000013FD31167  mov         eax,1  
000000013FD3116C  add         rsp,28h  
000000013FD31170  ret  
000000013FD31171  int         3  
000000013FD31172  int         3  
000000013FD31173  int         3  
--- f:\dd\vctools\crt_bld\self_64_amd64\crt\src\crtexe.c -----------------------

VS2015

        c = i;
000000013F0B1098  vmovaps     xmmword ptr [rsp+0B0h],xmm0  
    }

    mma = _mm256_load_ps(a);
000000013F0B10A1  vmovaps     ymm1,ymmword ptr [rsp+70h]  
    mmb = _mm256_load_ps(b);
000000013F0B10A7  vmovaps     ymm0,ymmword ptr [rsp+90h]  
        c = i;
000000013F0B10B0  vmovaps     xmmword ptr [rsp+0C0h],xmm5  
    mmc = _mm256_load_ps(c);
    __m256 ret = _mm256_fmadd_ps(mma, mmb, mmc);
000000013F0B10B9  vfmadd213ps ymm1,ymm0,ymmword ptr [rsp+0B0h]  
000000013F0B10C3  vmovaps     ymmword ptr [r13],ymm1  
    if(ret.m256_f32[7] == 56.0)
000000013F0B10C9  vmovss      xmm2,dword ptr [r13+1Ch]  
    if (fma_test())
    {
        printf("true");
    }
    else
    {
        printf("false");
000000013F0B10CF  vucomiss    xmm2,dword ptr [__xt_z+18h (013F0B4290h)]  
    if (fma_test())

 

 

0 Kudos
0 Replies
Reply