Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Highlighted
Beginner
28 Views

Difference between vs2010 and vs2015

Dear all, I wrote some code for testing FMA instructions, compiled in IDE vs 2010( of course using Intel c++ toolset), and there are not any FMA instructions generated. Then I compiled the same code in vs 2015, using the same Intel toolset, with same project configuration, and it generate FMA instructions successfully. I use Intel Parallel studio XE 2016 cluster

Is there any difference between vs2010 and vs2015?

Below is code:

#include "stdafx.h"
#include <Windows.h>
#include <immintrin.h>
 
bool fma_test()
{
	__m256 mma, mmb, mmc;
	float a[8], b[8], c[8];
	for (int i = 0; i < 8; ++i)
	{
		a = i;
		b = i;
		c = i;
	}
 
	mma = _mm256_load_ps(a);
	mmb = _mm256_load_ps(b);
	mmc = _mm256_load_ps(c);
	__m256 ret = _mm256_fmadd_ps(mma, mmb, mmc);
	if(ret.m256_f32[7] == 56.0)
		return true;
	return false;
}
 
int main()
{
	if (fma_test())
	{
		printf("true");
	}
	else
		printf("false");
	system("pause");
	return 0;
}

Below is disassembly in VS2010 and 2015:

VS2010:

000000013FD30FFC  add         byte ptr [rax],al  
000000013FD30FFE  add         byte ptr [rax],al  
--- D:\sl\XR\MoFangG\xfma\xfma.cpp ---------------------------------------------
        return true;
    return false;
}

int main()
{
000000013FD31000  sub         rsp,78h  
000000013FD31004  mov         edx,9D9FFEh  
000000013FD31009  mov         qword ptr [rsp+60h],r13  
000000013FD3100E  lea         r13,[rsp+3Fh]  
000000013FD31013  mov         ecx,3  
000000013FD31018  and         r13,0FFFFFFFFFFFFFFE0h  
000000013FD3101C  mov         rax,qword ptr [__security_cookie (13FD36000h)]  
000000013FD31023  xor         rax,rsp  
000000013FD31026  mov         qword ptr [rsp+70h],rax  
000000013FD3102B  call        __intel_new_feature_proc_init (13FD318F0h)  
000000013FD31030  vstmxcsr    dword ptr [rsp+68h]  
000000013FD31036  or          dword ptr [rsp+68h],8040h  
000000013FD3103E  vldmxcsr    dword ptr [rsp+68h]  
// xfma.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <Windows.h>
#include <immintrin.h>

bool fma_test()
{
    __m256 mma, mmb, mmc;
    float a[8], b[8], c[8];
    for (int i = 0; i < 8; ++i)
    {
        a = i;
000000013FD31044  vcvtdq2ps   ymm0,ymmword ptr [__xi_z+30h (13FD33220h)]  
        b = i;
        c = i;
    }

    mma = _mm256_load_ps(a);
    mmb = _mm256_load_ps(b);
    mmc = _mm256_load_ps(c);
    __m256 ret = _mm256_fmadd_ps(mma, mmb, mmc);
000000013FD3104C  db          c4h  
000000013FD3104D  loop        main+0CCh (13FD310CCh)  
000000013FD3104F  test        al,0C0h  
000000013FD31051  vmovups     ymmword ptr [rbp],ymm0  
    if(ret.m256_f32[7] == 56.0)
000000013FD31057  vmovss      xmm1,dword ptr [rbp+1Ch]  
000000013FD3105D  vucomiss    xmm1,dword ptr [__xi_z+50h (13FD33240h)]  
000000013FD31065  jp          main+69h (13FD31069h)  
000000013FD31067  je          main+9Fh (13FD3109Fh)  
    }
    else
        printf("false");
000000013FD31069  lea         rcx,[__xi_z+54h (13FD33244h)]  
000000013FD31070  vzeroupper  
000000013FD31073  call        qword ptr [__imp_printf (13FD33180h)]  
    system("pause");
000000013FD31079  lea         rcx,[__xi_z+5Ah (13FD3324Ah)]  
000000013FD31080  call        qword ptr [__imp_system (13FD330E0h)]  
    return 0;
000000013FD31086  mov         rcx,qword ptr [rsp+70h]  
000000013FD3108B  xor         rcx,rsp  
000000013FD3108E  call        __security_check_cookie (13FD310D0h)  
000000013FD31093  mov         r13,qword ptr [rsp+60h]  
000000013FD31098  xor         eax,eax  
000000013FD3109A  add         rsp,78h  
000000013FD3109E  ret  
    if (fma_test())
    {
        printf("true");
000000013FD3109F  lea         rcx,[__xi_z+60h (13FD33250h)]  
000000013FD310A6  vzeroupper  
000000013FD310A9  call        qword ptr [__imp_printf (13FD33180h)]  
000000013FD310AF  jmp         main+79h (13FD31079h)  
000000013FD310B1  nop         dword ptr [rax+rax]  
000000013FD310B9  nop         dword ptr [rax]  
--- No source file -------------------------------------------------------------
000000013FD310C0  int         3  
000000013FD310C1  int         3  
000000013FD310C2  int         3  
000000013FD310C3  int         3  
000000013FD310C4  int         3  
000000013FD310C5  int         3  
000000013FD310C6  nop         word ptr [rax+rax]  
__security_check_cookie:
000000013FD310D0  cmp         rcx,qword ptr [__security_cookie (13FD36000h)]  
000000013FD310D7  jne         ReportFailure (13FD310EAh)  
000000013FD310D9  rol         rcx,10h  
000000013FD310DD  test        cx,0FFFFh  
000000013FD310E2  jne         RestoreRcx (13FD310E6h)  
000000013FD310E4  rep ret  
RestoreRcx:
000000013FD310E6  ror         rcx,10h  
ReportFailure:
000000013FD310EA  jmp         __report_gsfailure (13FD31440h)  
000000013FD310EF  int         3  
__GSHandlerCheckCommon:
000000013FD310F0  push        rbx  
000000013FD310F2  sub         rsp,20h  
000000013FD310F6  mov         r11d,dword ptr [r8]  
000000013FD310F9  mov         rbx,rdx  
000000013FD310FC  mov         r9,rcx  
000000013FD310FF  and         r11d,0FFFFFFF8h  
000000013FD31103  test        byte ptr [r8],4  
000000013FD31107  mov         r10,rcx  
000000013FD3110A  je          __GSHandlerCheckCommon+2Fh (13FD3111Fh)  
000000013FD3110C  mov         eax,dword ptr [r8+8]  
000000013FD31110  movsxd      r10,dword ptr [r8+4]  
000000013FD31114  neg         eax  
000000013FD31116  add         r10,rcx  
000000013FD31119  movsxd      rcx,eax  
000000013FD3111C  and         r10,rcx  
000000013FD3111F  movsxd      rax,r11d  
000000013FD31122  mov         rdx,qword ptr [rax+r10]  
000000013FD31126  mov         rax,qword ptr [rbx+10h]  
000000013FD3112A  mov         ecx,dword ptr [rax+8]  
000000013FD3112D  add         rcx,qword ptr [rbx+8]  
000000013FD31131  test        byte ptr [rcx+3],0Fh  
000000013FD31135  je          __GSHandlerCheckCommon+53h (13FD31143h)  
000000013FD31137  movzx       eax,byte ptr [rcx+3]  
000000013FD3113B  and         eax,0FFFFFFF0h  
000000013FD3113E  cdqe  
000000013FD31140  add         r9,rax  
000000013FD31143  xor         r9,rdx  
000000013FD31146  mov         rcx,r9  
000000013FD31149  add         rsp,20h  
000000013FD3114D  pop         rbx  
000000013FD3114E  jmp         __security_check_cookie (13FD310D0h)  
000000013FD31153  int         3  
__GSHandlerCheck:
000000013FD31154  sub         rsp,28h  
000000013FD31158  mov         r8,qword ptr [r9+38h]  
000000013FD3115C  mov         rcx,rdx  
000000013FD3115F  mov         rdx,r9  
000000013FD31162  call        __GSHandlerCheckCommon (13FD310F0h)  
000000013FD31167  mov         eax,1  
000000013FD3116C  add         rsp,28h  
000000013FD31170  ret  
000000013FD31171  int         3  
000000013FD31172  int         3  
000000013FD31173  int         3  
--- f:\dd\vctools\crt_bld\self_64_amd64\crt\src\crtexe.c -----------------------

VS2015

        c = i;
000000013F0B1098  vmovaps     xmmword ptr [rsp+0B0h],xmm0  
    }

    mma = _mm256_load_ps(a);
000000013F0B10A1  vmovaps     ymm1,ymmword ptr [rsp+70h]  
    mmb = _mm256_load_ps(b);
000000013F0B10A7  vmovaps     ymm0,ymmword ptr [rsp+90h]  
        c = i;
000000013F0B10B0  vmovaps     xmmword ptr [rsp+0C0h],xmm5  
    mmc = _mm256_load_ps(c);
    __m256 ret = _mm256_fmadd_ps(mma, mmb, mmc);
000000013F0B10B9  vfmadd213ps ymm1,ymm0,ymmword ptr [rsp+0B0h]  
000000013F0B10C3  vmovaps     ymmword ptr [r13],ymm1  
    if(ret.m256_f32[7] == 56.0)
000000013F0B10C9  vmovss      xmm2,dword ptr [r13+1Ch]  
    if (fma_test())
    {
        printf("true");
    }
    else
    {
        printf("false");
000000013F0B10CF  vucomiss    xmm2,dword ptr [__xt_z+18h (013F0B4290h)]  
    if (fma_test())

 

0 Kudos
7 Replies
Highlighted
Black Belt
28 Views

Noting that full avx2 support was introduced in a vs2013 update, this may not be entirely surprising.  If you did use icl, this might be influenced by your arch setting and icl version.

0 Kudos
Highlighted
Employee
28 Views

Tim's correct, Raymond. AVX2 including the new FMA3 instructions were introduced 13.0 initial release and improved support for 3rd gen (-axCORE-AVX-I, -axCORE_AVX2) etc introduced (with range based for loops, c++11 etc) in updates.  So, it could be related to the arch and icl version per-se. You should try the subsequent update releases and find out. That said, it's a very old version. I'd suggest you upgrade to the latest 16.0 update 2 release that's out.

Regards,
Kittur

0 Kudos
Highlighted
Beginner
28 Views

Dear Tim and Kittur Ganesh, I said that I used same project congifuration for VS2010 and VS2015, the arch is both X64-Release, icl version is both 2016 XE update 2. Everything is same for VS2010 and VS2015, same project configuration(I checked for several times), same computer(i5-4590 with 8GB RAM), same ICL. This confused me very much.

Please check out the attachment files(projects and source codes for VS2010 and VS2015).

Kittur Ganesh (Intel) wrote:

Tim's correct, Raymond. AVX2 including the new FMA3 instructions were introduced 13.0 initial release and improved support for 3rd gen (-axCORE-AVX-I, -axCORE_AVX2) etc introduced (with range based for loops, c++11 etc) in updates.  So, it could be related to the arch and icl version per-se. You should try the subsequent update releases and find out. That said, it's a very old version. I'd suggest you upgrade to the latest 16.0 update 2 release that's out.

Regards,
Kittur

0 Kudos
Highlighted
Employee
28 Views

Thanks Raymond, for the attachments and will investigate accordingly, appreciate much.

Kittur

0 Kudos
Highlighted
Employee
28 Views

Hi, Raymond

Tim was right. AVX is not supported in Visual Studio 2010, and start to support in Visual Studio 2013.

The disassembly is parsed by Visual Studio 2010 debugger, which cannot show FMA instructions correctly.

Hope that resolve your confusion.

Thanks.

0 Kudos
Highlighted
28 Views

Yuan,

Why would the MS VS selection affect the same Intel PS XE (16.0.2) on the same CPU and O/S?

While I can accept that the Disassembly window of the VS debugger might not disassemble an FMA instruction, I do not see that as a compelling reason not to generate the instruction sequence on that version of the IDE. Most users do not use the disassembly window.

Jim Dempsey

0 Kudos
Highlighted
Valued Contributor II
28 Views

>>...Everything is same for VS2010 and VS2015, same project configuration(I checked for several times), same computer >>(i5-4590 with 8GB RAM), same ICL. This confused me very much. Try to use Vtune to see assembler codes with FMA instructions.
0 Kudos