- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I wrote a small test program to analyze the generated code for load/store operations with different memory order of the new std::atomic type.
[cpp]
#include <atomic>
std::atomic v(42);
__declspec(noinline) size_t load_relaxed() { return v.load(std::memory_order_relaxed); }
__declspec(noinline) size_t load_acquire() { return v.load(std::memory_order_acquire); }
__declspec(noinline) size_t load_consume() { return v.load(std::memory_order_consume); }
__declspec(noinline) size_t load_seq_cst() { return v.load(std::memory_order_seq_cst); }
__declspec(noinline) void store_relaxed(size_t arg) { v.store(arg, std::memory_order_relaxed); }
__declspec(noinline) void store_release(size_t arg) { v.store(arg, std::memory_order_release); }
__declspec(noinline) void store_seq_cst(size_t arg) { v.store(arg, std::memory_order_seq_cst); }
int main(int argc, char* argv[])
{
size_t x = 0;
x += load_relaxed();
x += load_acquire();
x += load_consume();
x += load_seq_cst();
store_relaxed(x);
store_release(x);
store_seq_cst(x);
return (int)x;
}
[/cpp]
The result with the Intel Composer XE 2013 looks as follows:
with Intel atomic header (__USE_INTEL_ATOMICs)
[plain]v.load(std::memory_order_relaxed);
lea rax,[v (013FE33020h)]
mov rax,qword ptr [rax][/plain]
[plain]v.load(std::memory_order_acquire);
lea rax,[v (013FE33020h)]
mov rax,qword ptr [rax]
lfence[/plain]
[plain]v.load(std::memory_order_seq_cst);
lea rax,[v (013FE33020h)]
mfence
mov rax,qword ptr [rax]
mfence[/plain]
[plain]v.store(arg, std::memory_order_relaxed);
lea rdx,[v (013FE33020h)]
mov qword ptr [rdx],rax[/plain]
[plain]v.store(arg, std::memory_order_release);
lea rdx,[v (013FE33020h)]
sfence
mov qword ptr [rdx],rax[/plain]
[plain]v.store(arg, std::memory_order_seq_cst);
lea rdx,[v (013FE33020h)]
xchg rax,qword ptr [rdx][/plain]
with Microsoft atomic header
[plain]v.load(std::memory_order_relaxed);
v.load(std::memory_order_acquire);
v.load(std::memory_order_seq_cst);
lea rdi,[v (013FA93020h)]
mov rax,qword ptr [rdi]
retry:
mov rdx,rax
or rdx,rcx
lock cmpxchg qword ptr [rdi],rdx
jne retry (013FA91081h)[/plain]
[plain]v.store(arg, std::memory_order_relaxed);
v.store(arg, std::memory_order_release);
mov qword ptr [v (013FA93020h)],rcx[/plain]
[plain]v.store(arg, std::memory_order_seq_cst);
lea rcx,[v (013FA93020h)]
xchg rax,qword ptr [rcx][/plain]
The generated code for the atomic loads with the Microsoft header is something I have to report to Microsoft (this implementation is a catastrophe from a perfomance point of view).
But what I don't understand why the generated code with the Intel header contains all kinds of lfence/sfence.
Especially: why does v.store(arg, std::memory_order_release) require a sfence before the write operation? Write opertions are guaranteed to be executed in program order anyway, right?
Thanks,
Manuel
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page