Here are a couple of functions ¯o-wrapper to measure time intervals with as better as possible accuracyusing RDTSC instruction:
//*** Version 1 - Win32 ***//
template < class T > inline _RTdeclspec_naked RTuint64 HrtClock( RTvoid )
{
//printf( "[ HrtClock ]\\n" );
_asm
{
rdtsc
ret
}
};
//*** Version 2 - Win32 ***//
_RTdeclspec_naked RTuint64 HrtClock( void )
{
//printf( "[ HrtClock ]\\n" );
_asm
{
rdtsc
ret
}
}
//*** Version 3 - Win32 ***//
void HrtClock( RTuint64 *puiClock )
{
//printf( "[ HrtClock ]\\n" );
RTuint64 v;
_asm
{
push eax
push edx
_emit 0x0f
_emit 0x31
mov dword ptr v, eax
mov dword ptr v+4, edx
pop edx
pop eax
}
*puiClock = v;
}
//*** Version 4 - Linux32 ( with GCC or MinGW ) ***//
RTclock_t HrtClock( RTvoid )
{
//printf( "[ HrtClock ]\\n" );
RTclock_t ctValue;
__asm__
(
"rdtsc;"
"mov %%edx, %%ecx;" : "=a" ( ctValue )
);
return ( RTclock_t )ctValue;
};
//*** Version 5 - Linux32 ( with GCC or MinGW ) ***//
#define HrtClock( Value ) \\
(\\
{\\
__asm__ __volatile__\\
(\\
".byte 0x0f; .byte 0x31" \\
: "=A" ( Value ) \\
);\\
}\\
)\\
链接已复制
MinGW C/C++ compilers
This is what Intel's documentation says about RDTSC instruction in an 'Instruction Set Reference' Volume 2B:
...
Loads the current value of the processor's time-stamp counter ( a 64-bit MSR ) into the EDX:EAX
registers.
...
Found issues are as follows:
1. On a 32-bit platforms 'clock_t' type is declared as a32-bit type 'long' ( 4 bytes ) and it is verified with
all 32-bitC/C++ compilers I use:
...
typedef long clock_t;
...
2. "=A" has to be used instead of "=a"
3. A pair of registers 'edx' and 'ecx'is used instead of 'edx' and 'eax', and if you change:
...
"mov %%edx, %%ecx;" : "=A" ( ctValue )
...
to
...
"mov %%edx, %%eax;" : "=A" ( ctValue )
...
it doesn't work! So, theregisters 'edx' and 'ecx'have to beused anyway
4. Finally, a working C/C++ code looks like this:
inline RTuint64 HrtClock( RTvoid )
{
RTuint64 uiValue;
__asm__
(
"rdtsc;"
"mov %%edx, %%ecx;" : "=A" ( uiValue )
);
return ( RTuint64 )uiValue;
}
Here are some assembler codes generated by a 'g++' C/C++ compiler.
The left example doesn't work. The right example works.
... ...
.stabn 68,0,284,LM5832-__Z8HrtClockv .stabn 68,0,284,LM5832-__Z8HrtClockv
LM5832: LM5832:
/APP /APP
rdtsc;mov %edx, %eax;rdtsc;mov %edx, %ecx;
/NO_APP /NO_APP
movl%eax, -8(%ebp) movl%eax, -8(%ebp)
movl%edx, -4(%ebp) movl%edx, -4(%ebp)
.stabn 68,0,287,LM5833-__Z8HrtClockv .stabn 68,0,287,LM5833-__Z8HrtClockv
LM5833: LM5833:
movl-8(%ebp), %eax movl-8(%ebp), %eax
movl-4(%ebp), %edx movl-4(%ebp), %edx
LBE895: LBE895:
LBE894: LBE894:
.stabn 68,0,288,LM5834-__Z8HrtClockv .stabn 68,0,288,LM5834-__Z8HrtClockv
LM5834: LM5834:
leave leave
ret ret
... ...
It is not clear why the left example doesn't work and it always returns 0.
It is worth mentioning the problem with RDTSC and out of order execution:
http://en.wikipedia.org/wiki/Time_Stamp_Counter
Starting with the Pentium Pro, Intel processors have supported out-of-order execution, where instructions are not necessarily performed in the order they appear in the executable. This can cause RDTSC to be executed later than expected, producing a misleading cycle count.[3] This problem can be solved by executing a serializing instruction, such as CPUID, to force every preceding instruction to complete before allowing the program to continue, or by using the RDTSCP instruction, which is a serializing variant of the RDTSC instruction (starting from Core i7[4] and starting from AMD Athlon 64 X2 CPUs with AM2 Socket (Windsor & Brisbane)).
Unfortunately, I can'tuse RDTSCP instruction.
I had a problem withincorrect values returned from 'HrtClock' function that uses RTDSC instruction.It
was related to a declaration of return value as'clock_t' ( long \ 32-bit )instead of 'uint64' ( 64-bit ).
I wanted to inform that possiblythere is a bug in the MinGW C/C++ compiler related to '%%edx, %%ecx' declaration.
Best regards,
Sergey
[bash]unsigned long long int rdtsc( )
{
[/bash] ....[bash]#elif defined(__GNUC__)
#if defined i386
long long a;
asm volatile("rdtsc":"=A" (a));
return a;
#elif defined __x86_64
unsigned int _hi,_lo;
asm volatile("rdtsc":"=a"(_lo),"=d"(_hi));
return ((unsigned long long int)_hi << 32) | _lo;
....
[/bash] Evidently, tick count differences should be taken in 64-bit unsigned arithmetic.The left shift operator displays correctly if you go to edit mode.
>>...Evidently, tick count differences should be taken in 64-bit unsigned arithmetic...
Always! When I tried to use 'clock_t', type'long' 32-bit on a32-bit platform, I had sometimes absolutely
incorrect values returned from my'HrtClock' function.
Best regards,
Sergey
I decided to use
__asm__ volatile( "rdtsc;" : "=A" ( uiValue ) ); ( version B )
instead of
__asm__( "rdtsc;"
"mov %%edx, %%ecx;" : "=A" ( uiValue ) );( version A )
because 'version B' doesn't have 'mov %%edx, %%ecx'.
In terms of assembler codes compiled by a MinGW C/C++ compiler I haven't found any problems
with theversion B:
...
.stabn 68,0,244,LM2990-__Z8HrtClockv
LM2990:
pushl %ebp
movl %esp, %ebp
subl $8, %esp
LBB548:
LBB549:
.stabn 68,0,258,LM2991-__Z8HrtClockv
LM2991:
/APP
rdtsc;
/NO_APP
movl %eax, -8(%ebp)
movl %edx, -4(%ebp)
.stabn 68,0,260,LM2992-__Z8HrtClockv
LM2992:
movl -8(%ebp), %eax
movl -4(%ebp), %edx
LBE549:
LBE548:
.stabn 68,0,261,LM2993-__Z8HrtClockv
LM2993:
leave
ret
.stabs "uiValue:(93,22)",128,0,249,-8
...
and it works. No runtimeissues or problems detected so far.
Best regards,
Sergey
This is a follow up. All three widely used Visual Studios 2005, 2008 and 2010 have a declaration of '__rdtsc' intrinsic function
in 'intrin.h' header file:
...
__MACHINEI( unsigned __int64 __rdtsc( void ) )
...