Software Tuning, Performance Optimization & Platform Monitoring
Discussion regarding monitoring and software tuning methodologies, Performance Monitoring Unit (PMU) of Intel microprocessors, and platform updating.

Intel fast number generator isn't works as expected

Pavel_K_
Beginner
473 Views

I am trying to use Intel fast number generator. I added GetRandom(unsigned int low, unsigned int high) method - to get next random number and and srand_sse() to set up seed value from time function. All else is the same as in article. You will see implimentation below. There is one big mystery for me. 

Why

__declspec(align(16)) static __m128i cur_seed;

isn't static and changes it value in unpredictable way when the program current execution point moves to other method(i seed this from vs debugger). As you see from code i seted up starting seed value with srand_sse() from time function, but it isn't working(random sequences are the same) if i call GetRandom(..) out of scope were i called srand_sse(), and works if i call it both in one scope... i can't undestand what is going here and how to make seed value works for every place. Does it changes because __m128i refers registers valus? But how can it be static in that case?

 

//FastRandom.h
#pragma once
#include "emmintrin.h"
#include <time.h>
//define this if you wish to return values similar to the standard rand();
//#define COMPATABILITY

namespace Brans
{
        static unsigned short curRandIndex = 4;
        static unsigned int randoms[4];
        __declspec(align(16)) static __m128i cur_seed;

        // uncoment this if you are using intel compiler
        // for MS CL the vectorizer is on by default and jumps in if you
        // compile with /O2 ...
        //#pragma intel optimization_parameter target_arch=avx
        //__declspec(cpu_dispatch(core_2nd_gen_avx, core_i7_sse4_2, core_2_duo_ssse3, generic )
        inline void rand_sse(unsigned int* result)
        {
            __declspec(align(16)) __m128i cur_seed_split;

            __declspec(align(16)) __m128i multiplier;

            __declspec(align(16)) __m128i adder;

            __declspec(align(16)) __m128i mod_mask;

            __declspec(align(16)) __m128i sra_mask;

            __declspec(align(16)) __m128i sseresult;

            __declspec(align(16)) static const unsigned int mult[4] =

            { 214013, 17405, 214013, 69069 };

            __declspec(align(16)) static const unsigned int gadd[4] =

            { 2531011, 10395331, 13737667, 1 };

            __declspec(align(16)) static const unsigned int mask[4] =

            { 0xFFFFFFFF, 0, 0xFFFFFFFF, 0 };

            __declspec(align(16)) static const unsigned int masklo[4] =

            { 0x00007FFF, 0x00007FFF, 0x00007FFF, 0x00007FFF };



            adder = _mm_load_si128((__m128i*) gadd);

            multiplier = _mm_load_si128((__m128i*) mult);

            mod_mask = _mm_load_si128((__m128i*) mask);

            sra_mask = _mm_load_si128((__m128i*) masklo);

            cur_seed_split = _mm_shuffle_epi32(cur_seed, _MM_SHUFFLE(2, 3, 0, 1));



            cur_seed = _mm_mul_epu32(cur_seed, multiplier);

            multiplier = _mm_shuffle_epi32(multiplier, _MM_SHUFFLE(2, 3, 0, 1));

            cur_seed_split = _mm_mul_epu32(cur_seed_split, multiplier);


            cur_seed = _mm_and_si128(cur_seed, mod_mask);

            cur_seed_split = _mm_and_si128(cur_seed_split, mod_mask);

            cur_seed_split = _mm_shuffle_epi32(cur_seed_split, _MM_SHUFFLE(2, 3, 0, 1));

            cur_seed = _mm_or_si128(cur_seed, cur_seed_split);

            cur_seed = _mm_add_epi32(cur_seed, adder);


#ifdef COMPATABILITY



            // Add the lines below if you wish to reduce your results to 16-bit vals...

            sseresult = _mm_srai_epi32(cur_seed, 16);

            sseresult = _mm_and_si128(sseresult, sra_mask);

            _mm_storeu_si128((__m128i*) result, sseresult);

            return;

#endif


            _mm_storeu_si128((__m128i*) result, cur_seed);

            return;
        }

        inline void srand_sse(unsigned int seed)
        {
            cur_seed = _mm_set_epi32(seed, seed + 1, seed, seed + 1);
        }

        inline void srand_sse()
        {
            unsigned int seed = (unsigned int)time(0);
            cur_seed = _mm_set_epi32(seed, seed + 1, seed, seed + 1);
        }

        inline unsigned int GetRandom(unsigned int low, unsigned int high)
        {
            if (curRandIndex < 4)
            {
                unsigned int res = randoms[curRandIndex];
                curRandIndex++;
                return res % (high - low + 1) + low;
            }
            else
            {
                curRandIndex = 0;
                rand_sse(randoms);
                return GetRandom(low, high);
            }
        }

    };

//RandomProvider.h
#pragma once
#include <random>
#include "FastRandom.h"

namespace Brans
{
    using namespace std;

    class RandomValuesProvider //: public RandomProviderBase
    {
    public:
        RandomValuesProvider(int upperLimit);
        int GetNextValue();
    private:
        int _upperLimit;
    };
}

//RandomProvider.cpp:
#include "stdafx.h"
#include "RandomProvider.h"

namespace Brans
{
    int RandomValuesProvider::GetNextValue()
    {
        int tmp = GetRandom(1, _upperLimit);
        return tmp;
    }

    RandomValuesProvider::RandomValuesProvider(int upperLimit) : _upperLimit(upperLimit)
    {}
}

// Runner.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <iostream>
#include "RandomProvider.h"
using namespace Brans;

    int _tmain(int argc, _TCHAR* argv[])
    {
        srand_sse(); //Setting up starting seed value
        RandomValuesProvider _conProvider(29);
        int theSameValue = _conProvider.GetNextValue();
        cout << "Bad random value is :" << theSameValue << endl;
        /*uncomment this to have not just correct theDifferentValue, but also to fix theSameValue and make it each time different(with random seed). Fantastic!
        int theDifferentValue = GetRandom(1, 29);
        cout << "Good random value is :" << theDifferentValue << endl;*/
        return 0;
    }

[VS 2013 Solution download link(very small])2

 

 

 

0 Kudos
4 Replies
Patrick_F_Intel1
Employee
473 Views

Can you provide details?

0 Kudos
Patrick_F_Intel1
Employee
473 Views

Sorry but your comment is not showing up.

pat

0 Kudos
Bernard
Valued Contributor I
473 Views

>>>Does it changes because __m128i refers registers valus>>>

IIRC __m128i is of type union and its loaded into XMM registers.If I am not wrong when declared this way _m128i curr_seed at the compile time the address of the variable curr_seed is obtained by using LEA instruction.Following the access pattern to the content of register which holds the address of curr_seed variable could maybe shed some light on what could have probably gone wrong during the program execution.

0 Kudos
Patrick_F_Intel1
Employee
473 Views

Hello Pavel,

When you run the code from the sample article, does it work properly?

Pat

0 Kudos
Reply