- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I am trying to use Intel fast number generator. I added GetRandom(unsigned int low, unsigned int high) method - to get next random number and and srand_sse() to set up seed value from time function. All else is the same as in article. You will see implimentation below. There is one big mystery for me.
Why
__declspec(align(16)) static __m128i cur_seed;
isn't static and changes it value in unpredictable way when the program current execution point moves to other method(i seed this from vs debugger). As you see from code i seted up starting seed value with srand_sse() from time function, but it isn't working(random sequences are the same) if i call GetRandom(..) out of scope were i called srand_sse(), and works if i call it both in one scope... i can't undestand what is going here and how to make seed value works for every place. Does it changes because __m128i refers registers valus? But how can it be static in that case?
//FastRandom.h
#pragma once
#include "emmintrin.h"
#include <time.h>
//define this if you wish to return values similar to the standard rand();
//#define COMPATABILITY
namespace Brans
{
static unsigned short curRandIndex = 4;
static unsigned int randoms[4];
__declspec(align(16)) static __m128i cur_seed;
// uncoment this if you are using intel compiler
// for MS CL the vectorizer is on by default and jumps in if you
// compile with /O2 ...
//#pragma intel optimization_parameter target_arch=avx
//__declspec(cpu_dispatch(core_2nd_gen_avx, core_i7_sse4_2, core_2_duo_ssse3, generic )
inline void rand_sse(unsigned int* result)
{
__declspec(align(16)) __m128i cur_seed_split;
__declspec(align(16)) __m128i multiplier;
__declspec(align(16)) __m128i adder;
__declspec(align(16)) __m128i mod_mask;
__declspec(align(16)) __m128i sra_mask;
__declspec(align(16)) __m128i sseresult;
__declspec(align(16)) static const unsigned int mult[4] =
{ 214013, 17405, 214013, 69069 };
__declspec(align(16)) static const unsigned int gadd[4] =
{ 2531011, 10395331, 13737667, 1 };
__declspec(align(16)) static const unsigned int mask[4] =
{ 0xFFFFFFFF, 0, 0xFFFFFFFF, 0 };
__declspec(align(16)) static const unsigned int masklo[4] =
{ 0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF };
adder = _mm_load_si128((__m128i*) gadd);
multiplier = _mm_load_si128((__m128i*) mult);
mod_mask = _mm_load_si128((__m128i*) mask);
sra_mask = _mm_load_si128((__m128i*) masklo);
cur_seed_split = _mm_shuffle_epi32(cur_seed, _MM_SHUFFLE(2, 3, 0, 1));
cur_seed = _mm_mul_epu32(cur_seed, multiplier);
multiplier = _mm_shuffle_epi32(multiplier, _MM_SHUFFLE(2, 3, 0, 1));
cur_seed_split = _mm_mul_epu32(cur_seed_split, multiplier);
cur_seed = _mm_and_si128(cur_seed, mod_mask);
cur_seed_split = _mm_and_si128(cur_seed_split, mod_mask);
cur_seed_split = _mm_shuffle_epi32(cur_seed_split, _MM_SHUFFLE(2, 3, 0, 1));
cur_seed = _mm_or_si128(cur_seed, cur_seed_split);
cur_seed = _mm_add_epi32(cur_seed, adder);
#ifdef COMPATABILITY
// Add the lines below if you wish to reduce your results to 16-bit vals...
sseresult = _mm_srai_epi32(cur_seed, 16);
sseresult = _mm_and_si128(sseresult, sra_mask);
_mm_storeu_si128((__m128i*) result, sseresult);
return;
#endif
_mm_storeu_si128((__m128i*) result, cur_seed);
return;
}
inline void srand_sse(unsigned int seed)
{
cur_seed = _mm_set_epi32(seed, seed + 1, seed, seed + 1);
}
inline void srand_sse()
{
unsigned int seed = (unsigned int)time(0);
cur_seed = _mm_set_epi32(seed, seed + 1, seed, seed + 1);
}
inline unsigned int GetRandom(unsigned int low, unsigned int high)
{
if (curRandIndex < 4)
{
unsigned int res = randoms[curRandIndex];
curRandIndex++;
return res % (high - low + 1) + low;
}
else
{
curRandIndex = 0;
rand_sse(randoms);
return GetRandom(low, high);
}
}
};
//RandomProvider.h
#pragma once
#include <random>
#include "FastRandom.h"
namespace Brans
{
using namespace std;
class RandomValuesProvider //: public RandomProviderBase
{
public:
RandomValuesProvider(int upperLimit);
int GetNextValue();
private:
int _upperLimit;
};
}
//RandomProvider.cpp:
#include "stdafx.h"
#include "RandomProvider.h"
namespace Brans
{
int RandomValuesProvider::GetNextValue()
{
int tmp = GetRandom(1, _upperLimit);
return tmp;
}
RandomValuesProvider::RandomValuesProvider(int upperLimit) : _upperLimit(upperLimit)
{}
}
// Runner.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <iostream>
#include "RandomProvider.h"
using namespace Brans;
int _tmain(int argc, _TCHAR* argv[])
{
srand_sse(); //Setting up starting seed value
RandomValuesProvider _conProvider(29);
int theSameValue = _conProvider.GetNextValue();
cout << "Bad random value is :" << theSameValue << endl;
/*uncomment this to have not just correct theDifferentValue, but also to fix theSameValue and make it each time different(with random seed). Fantastic!
int theDifferentValue = GetRandom(1, 29);
cout << "Good random value is :" << theDifferentValue << endl;*/
return 0;
}
[VS 2013 Solution download link(very small])2
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Can you provide details?
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Sorry but your comment is not showing up.
pat
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
>>>Does it changes because __m128i refers registers valus>>>
IIRC __m128i is of type union and its loaded into XMM registers.If I am not wrong when declared this way _m128i curr_seed at the compile time the address of the variable curr_seed is obtained by using LEA instruction.Following the access pattern to the content of register which holds the address of curr_seed variable could maybe shed some light on what could have probably gone wrong during the program execution.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello Pavel,
When you run the code from the sample article, does it work properly?
Pat
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page