<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment in Intel® oneAPI Math Kernel Library</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571491#M35813</link>
    <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;//
//----- C++11 random number generation when not using OpenMP -------------
//

#ifndef _OPENMP

#include &amp;lt;random&amp;gt;           // C++11 random number generators
#include &amp;lt;functional&amp;gt;

/* some web references

   https://www.cplusplus.com/reference/random/
   https://stackoverflow.com/questions/14023880/c11-random-numbers-and-stdbind-i
nteract-in-unexpected-way/14023935
   https://stackoverflow.com/questions/20671573/c11-stdgenerate-and-stduniform-r
eal-distribution-called-two-times-gives-st

*/

// declare generator and output distributions

std::default_random_engine rng;
std::uniform_real_distribution&amp;lt;float&amp;gt; uniform(0.0f, 1.0f);
std::normal_distribution&amp;lt;float&amp;gt; normal(0.0f, 1.0f);

auto next_uniform = std::bind(std::ref(uniform), std::ref(rng));
auto next_normal = std::bind(std::ref(normal), std::ref(rng));

void rng_initialisation() {
    rng.seed(1234);
    uniform.reset();
    normal.reset();
}

void rng_termination() {
}

//------- MKL/VSL random number generation when using OpenMP -----------

#else

#include &amp;lt;mkl.h&amp;gt;
#include &amp;lt;mkl_vsl.h&amp;gt;
#include &amp;lt;memory.h&amp;gt;
#include &amp;lt;omp.h&amp;gt;
#include &amp;lt;stdio.h&amp;gt;

/* each OpenMP thread has its own VSL RNG and storage */

#define NRV 16384  // number of random variables
VSLStreamStatePtr stream;
float* uniforms, * normals;
int    uniforms_count, normals_count;
#pragma omp threadprivate(stream, uniforms,uniforms_count, \
                                  normals, normals_count)

//
// RNG routines
//

void rng_initialisation() {
    int tid = omp_get_thread_num();
    int status = vslNewStream(&amp;amp;stream, VSL_BRNG_MRG32K3A, 1337);
    if (status != VSL_STATUS_OK || stream == NULL) {
        printf("Stream initialization failed with status: %d\n", status);
        return; 
    }

    long long skip = ((long long)(tid + 1)) &amp;lt;&amp;lt; 48;
    status = vslSkipAheadStream(stream, skip);
    if (status != VSL_STATUS_OK) {
        printf("vslSkipAheadStream failed wih status: %d\n", status);
        return; 
    }

    uniforms = (float*)malloc(NRV * sizeof(float));
    normals = (float*)malloc(NRV * sizeof(float));
    if (uniforms == NULL || normals == NULL) {
        printf("Memory allocation failed.\n");
        return; 
    }

    uniforms_count = 0; // this means there are no random
    normals_count = 0; // numbers in the arrays currently
}

void rng_termination() {
    vslDeleteStream(&amp;amp;stream);
    free(uniforms);
    free(normals);
}

float next_uniform() {
    if (uniforms_count == 0) {
        vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,
            stream, NRV, uniforms, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

#endif

//
// other header files needed for both versions
//

#include &amp;lt;stdio.h&amp;gt;
#include &amp;lt;stdlib.h&amp;gt;
#include &amp;lt;math.h&amp;gt;

//
// main code
//

int main(int argc, char** argv)
{
    float  T = 1.0f, X0 = 1.0f, mu = 0.05f, sigma = 0.2f, dt;
    double sum1 = 0.0, sum2 = 0.0;
    int    M = 200;      /* number of timesteps */
    int    N = 19600000;  /* total number of MC samples */

    dt = T / ((float)M);

    // initialise generator, with separate storage for each
    // thread when compiled for OpenMP
#pragma omp parallel
    rng_initialisation();

#ifdef _OPENMP
    double wtime = omp_get_wtime();
    omp_set_num_threads(8);
#endif

#pragma omp parallel for default(none) shared(T,X0,mu,sigma,dt,M,N) \
                                       reduction(+:sum1,sum2)
    for (int n = 0; n &amp;lt; N; n++) {
        float X = X0;

        for (int m = 0; m &amp;lt; M; m++) {
            float delW = sqrtf(dt) * next_normal();
            X = X + X * (mu * dt + sigma * delW);
        }

        sum1 += X;
        sum2 += X * X;
    }

    printf("Exact solution E[X_T] = %g\n", X0 * exp(mu * T));
    printf("Monte Carlo estimate  = %g +/- %g \n", sum1 / N,
        3.0 * sqrt((sum2 / N - (sum1 / N) * (sum1 / N)) / N));
    printf("\nReminder: Monte Carlo estimate has discretisation bias\n\n");
    float RNGs = ((float)N) * ((float)M);
    printf("Random Nums generated = %g\n", RNGs);

#ifdef _OPENMP
    wtime = omp_get_wtime() - wtime;
    printf("threads               = %d\n", omp_get_max_threads());
    printf("execution time        = %10.4g\n", wtime);
    printf("RNG/s                 = %10.4g\n\n", RNGs / wtime);
#endif

    // delete generator and storage
#pragma omp parallel 
    rng_termination();
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;First of all, thank you for your reply; it has helped narrow down the potential sources of the issue.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;The code I am using is this:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;//
//----- C++11 random number generation when not using OpenMP -------------
//

#ifndef _OPENMP

#include &amp;lt;random&amp;gt;           // C++11 random number generators
#include &amp;lt;functional&amp;gt;

/* some web references

   https://www.cplusplus.com/reference/random/
   https://stackoverflow.com/questions/14023880/c11-random-numbers-and-stdbind-i
nteract-in-unexpected-way/14023935
   https://stackoverflow.com/questions/20671573/c11-stdgenerate-and-stduniform-r
eal-distribution-called-two-times-gives-st

*/

// declare generator and output distributions

std::default_random_engine rng;
std::uniform_real_distribution&amp;lt;float&amp;gt; uniform(0.0f, 1.0f);
std::normal_distribution&amp;lt;float&amp;gt; normal(0.0f, 1.0f);

auto next_uniform = std::bind(std::ref(uniform), std::ref(rng));
auto next_normal = std::bind(std::ref(normal), std::ref(rng));

void rng_initialisation() {
    rng.seed(1234);
    uniform.reset();
    normal.reset();
}

void rng_termination() {
}

//------- MKL/VSL random number generation when using OpenMP -----------

#else

#include &amp;lt;mkl.h&amp;gt;
#include &amp;lt;mkl_vsl.h&amp;gt;
#include &amp;lt;memory.h&amp;gt;
#include &amp;lt;omp.h&amp;gt;
#include &amp;lt;stdio.h&amp;gt;

/* each OpenMP thread has its own VSL RNG and storage */

#define NRV 16384  // number of random variables
VSLStreamStatePtr stream;
float* uniforms, * normals;
int    uniforms_count, normals_count;
#pragma omp threadprivate(stream, uniforms,uniforms_count, \
                                  normals, normals_count)

//
// RNG routines
//

void rng_initialisation() {
    int tid = omp_get_thread_num();
    int status = vslNewStream(&amp;amp;stream, VSL_BRNG_MRG32K3A, 1337);
    if (status != VSL_STATUS_OK || stream == NULL) {
        printf("Stream initialization failed with status: %d\n", status);
        return; 
    }

    long long skip = ((long long)(tid + 1)) &amp;lt;&amp;lt; 48;
    status = vslSkipAheadStream(stream, skip);
    if (status != VSL_STATUS_OK) {
        printf("vslSkipAheadStream failed wih status: %d\n", status);
        return; 
    }

    uniforms = (float*)malloc(NRV * sizeof(float));
    normals = (float*)malloc(NRV * sizeof(float));
    if (uniforms == NULL || normals == NULL) {
        printf("Memory allocation failed.\n");
        return; 
    }

    uniforms_count = 0; // this means there are no random
    normals_count = 0; // numbers in the arrays currently
}

void rng_termination() {
    vslDeleteStream(&amp;amp;stream);
    free(uniforms);
    free(normals);
}

float next_uniform() {
    if (uniforms_count == 0) {
        vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,
            stream, NRV, uniforms, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

#endif

//
// other header files needed for both versions
//

#include &amp;lt;stdio.h&amp;gt;
#include &amp;lt;stdlib.h&amp;gt;
#include &amp;lt;math.h&amp;gt;

//
// main code
//

int main(int argc, char** argv)
{
    float  T = 1.0f, X0 = 1.0f, mu = 0.05f, sigma = 0.2f, dt;
    double sum1 = 0.0, sum2 = 0.0;
    int    M = 200;      /* number of timesteps */
    int    N = 19600000;  /* total number of MC samples */

    dt = T / ((float)M);

    // initialise generator, with separate storage for each
    // thread when compiled for OpenMP
#pragma omp parallel
    rng_initialisation();

#ifdef _OPENMP
    double wtime = omp_get_wtime();
    omp_set_num_threads(8);
#endif

#pragma omp parallel for default(none) shared(T,X0,mu,sigma,dt,M,N) \
                                       reduction(+:sum1,sum2)
    for (int n = 0; n &amp;lt; N; n++) {
        float X = X0;

        for (int m = 0; m &amp;lt; M; m++) {
            float delW = sqrtf(dt) * next_normal();
            X = X + X * (mu * dt + sigma * delW);
        }

        sum1 += X;
        sum2 += X * X;
    }

    printf("Exact solution E[X_T] = %g\n", X0 * exp(mu * T));
    printf("Monte Carlo estimate  = %g +/- %g \n", sum1 / N,
        3.0 * sqrt((sum2 / N - (sum1 / N) * (sum1 / N)) / N));
    printf("\nReminder: Monte Carlo estimate has discretisation bias\n\n");
    float RNGs = ((float)N) * ((float)M);
    printf("Random Nums generated = %g\n", RNGs);

#ifdef _OPENMP
    wtime = omp_get_wtime() - wtime;
    printf("threads               = %d\n", omp_get_max_threads());
    printf("execution time        = %10.4g\n", wtime);
    printf("RNG/s                 = %10.4g\n\n", RNGs / wtime);
#endif

    // delete generator and storage
#pragma omp parallel 
    rng_termination();
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;HR /&gt;&lt;P&gt;When I use omp_set_num_threads(8), it works fine and uses all the threads in the expected time. However, when I change the number of the threads, to 4 for instance, it shows me the&amp;nbsp;&lt;SPAN&gt;“Access violation reading location” problem. The debugging points to this part of the code:&lt;/SPAN&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Could the issue be related to a missing library, specifically &lt;/SPAN&gt;libiomp5md.lib&lt;SPAN&gt;? The Intel MKL Link Line Advisor recommended its use, yet it appears to be missing from the installed package.&lt;/SPAN&gt;&lt;/P&gt;</description>
    <pubDate>Mon, 12 Feb 2024 11:56:23 GMT</pubDate>
    <dc:creator>Munera</dc:creator>
    <dc:date>2024-02-12T11:56:23Z</dc:date>
    <item>
      <title>Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1569722#M35787</link>
      <description>&lt;P&gt;Hello, I am writing to seek assistance with a challenging issue I've encountered while developing a C++ application that utilizes Intel Math Kernel Library (MKL) and OpenMP for parallel processing and random number generation. My development environment is on Windows, using Microsoft Visual Studio, and the application behaves as expected when running with the maximum number of OpenMP threads (8 threads) or a single thread. However, when I adjust the thread count to any number other than 8 or 1, for example, 4 threads, the application fails at runtime with specific errors.&lt;/P&gt;&lt;P&gt;To be clearer, Changing the thread count to other values results in an unhandled exception: "Access violation reading location" and an Intel MKL error: "Parameter 2 was incorrect on entry to vsRngGaussian".&lt;/P&gt;&lt;P&gt;This issue is not observed in a Linux environment, indicating a possible platform-specific behavior. The application employs #pragma omp threadprivate for per-thread MKL VSLStreamStatePtr management and dynamic memory allocations for random number arrays.&lt;/P&gt;&lt;P&gt;Additionally, I've encountered a missing library issue (libiomp5md.lib) as suggested by the Intel MKL Link Line Advisor, which might be affecting the application's performance or the occurrence of runtime errors.&lt;/P&gt;&lt;P&gt;&lt;STRONG&gt;I am Seeking Guidance On:&lt;/STRONG&gt;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Any known compatibility issues between Intel MKL, OpenMP, and Windows that could lead to the described behavior.&lt;/LI&gt;&lt;LI&gt;Specific configuration or environmental settings required for stable operation of MKL and OpenMP on Windows, especially regarding dynamic thread count adjustments.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;&lt;STRONG&gt;Additional Note:&lt;/STRONG&gt;&lt;SPAN&gt; I have successfully used OpenMP in a Windows environment with other codebases, where changing the thread count to any number posed no issues. The problem specifically occurs when integrating MKL for random number generation, which leads me to believe the issue might be closely tied to the MKL and OpenMP interplay in this particular context.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Could you please provide insights or direct me to relevant documentation that might help resolve these issues?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 06 Feb 2024 11:26:51 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1569722#M35787</guid>
      <dc:creator>Munera</dc:creator>
      <dc:date>2024-02-06T11:26:51Z</dc:date>
    </item>
    <item>
      <title>Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571396#M35811</link>
      <description>&lt;P&gt;&amp;gt;&amp;gt; This issue is not observed in a Linux environment, indicating a possible platform-specific behavior.&lt;/P&gt;
&lt;P&gt;&amp;lt;&amp;lt; There is no platform-specific behavior with RNG’s implementation.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;wrt “Access violation reading location” – make sense to give us a reproducer of this problem to investigate the behavior.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;gt;&amp;gt; The problem specifically occurs when integrating MKL for random number generation, which leads me to believe the issue might be closely tied to the MKL and OpenMP interplay in this context.&lt;/P&gt;
&lt;P&gt;There are no interoperability problems with OpenMP and MKL . At least we could say we are not aware about such.&lt;/P&gt;</description>
      <pubDate>Mon, 12 Feb 2024 05:43:10 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571396#M35811</guid>
      <dc:creator>Gennady_F_Intel</dc:creator>
      <dc:date>2024-02-12T05:43:10Z</dc:date>
    </item>
    <item>
      <title>Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571491#M35813</link>
      <description>&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;//
//----- C++11 random number generation when not using OpenMP -------------
//

#ifndef _OPENMP

#include &amp;lt;random&amp;gt;           // C++11 random number generators
#include &amp;lt;functional&amp;gt;

/* some web references

   https://www.cplusplus.com/reference/random/
   https://stackoverflow.com/questions/14023880/c11-random-numbers-and-stdbind-i
nteract-in-unexpected-way/14023935
   https://stackoverflow.com/questions/20671573/c11-stdgenerate-and-stduniform-r
eal-distribution-called-two-times-gives-st

*/

// declare generator and output distributions

std::default_random_engine rng;
std::uniform_real_distribution&amp;lt;float&amp;gt; uniform(0.0f, 1.0f);
std::normal_distribution&amp;lt;float&amp;gt; normal(0.0f, 1.0f);

auto next_uniform = std::bind(std::ref(uniform), std::ref(rng));
auto next_normal = std::bind(std::ref(normal), std::ref(rng));

void rng_initialisation() {
    rng.seed(1234);
    uniform.reset();
    normal.reset();
}

void rng_termination() {
}

//------- MKL/VSL random number generation when using OpenMP -----------

#else

#include &amp;lt;mkl.h&amp;gt;
#include &amp;lt;mkl_vsl.h&amp;gt;
#include &amp;lt;memory.h&amp;gt;
#include &amp;lt;omp.h&amp;gt;
#include &amp;lt;stdio.h&amp;gt;

/* each OpenMP thread has its own VSL RNG and storage */

#define NRV 16384  // number of random variables
VSLStreamStatePtr stream;
float* uniforms, * normals;
int    uniforms_count, normals_count;
#pragma omp threadprivate(stream, uniforms,uniforms_count, \
                                  normals, normals_count)

//
// RNG routines
//

void rng_initialisation() {
    int tid = omp_get_thread_num();
    int status = vslNewStream(&amp;amp;stream, VSL_BRNG_MRG32K3A, 1337);
    if (status != VSL_STATUS_OK || stream == NULL) {
        printf("Stream initialization failed with status: %d\n", status);
        return; 
    }

    long long skip = ((long long)(tid + 1)) &amp;lt;&amp;lt; 48;
    status = vslSkipAheadStream(stream, skip);
    if (status != VSL_STATUS_OK) {
        printf("vslSkipAheadStream failed wih status: %d\n", status);
        return; 
    }

    uniforms = (float*)malloc(NRV * sizeof(float));
    normals = (float*)malloc(NRV * sizeof(float));
    if (uniforms == NULL || normals == NULL) {
        printf("Memory allocation failed.\n");
        return; 
    }

    uniforms_count = 0; // this means there are no random
    normals_count = 0; // numbers in the arrays currently
}

void rng_termination() {
    vslDeleteStream(&amp;amp;stream);
    free(uniforms);
    free(normals);
}

float next_uniform() {
    if (uniforms_count == 0) {
        vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,
            stream, NRV, uniforms, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

#endif

//
// other header files needed for both versions
//

#include &amp;lt;stdio.h&amp;gt;
#include &amp;lt;stdlib.h&amp;gt;
#include &amp;lt;math.h&amp;gt;

//
// main code
//

int main(int argc, char** argv)
{
    float  T = 1.0f, X0 = 1.0f, mu = 0.05f, sigma = 0.2f, dt;
    double sum1 = 0.0, sum2 = 0.0;
    int    M = 200;      /* number of timesteps */
    int    N = 19600000;  /* total number of MC samples */

    dt = T / ((float)M);

    // initialise generator, with separate storage for each
    // thread when compiled for OpenMP
#pragma omp parallel
    rng_initialisation();

#ifdef _OPENMP
    double wtime = omp_get_wtime();
    omp_set_num_threads(8);
#endif

#pragma omp parallel for default(none) shared(T,X0,mu,sigma,dt,M,N) \
                                       reduction(+:sum1,sum2)
    for (int n = 0; n &amp;lt; N; n++) {
        float X = X0;

        for (int m = 0; m &amp;lt; M; m++) {
            float delW = sqrtf(dt) * next_normal();
            X = X + X * (mu * dt + sigma * delW);
        }

        sum1 += X;
        sum2 += X * X;
    }

    printf("Exact solution E[X_T] = %g\n", X0 * exp(mu * T));
    printf("Monte Carlo estimate  = %g +/- %g \n", sum1 / N,
        3.0 * sqrt((sum2 / N - (sum1 / N) * (sum1 / N)) / N));
    printf("\nReminder: Monte Carlo estimate has discretisation bias\n\n");
    float RNGs = ((float)N) * ((float)M);
    printf("Random Nums generated = %g\n", RNGs);

#ifdef _OPENMP
    wtime = omp_get_wtime() - wtime;
    printf("threads               = %d\n", omp_get_max_threads());
    printf("execution time        = %10.4g\n", wtime);
    printf("RNG/s                 = %10.4g\n\n", RNGs / wtime);
#endif

    // delete generator and storage
#pragma omp parallel 
    rng_termination();
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;First of all, thank you for your reply; it has helped narrow down the potential sources of the issue.&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;The code I am using is this:&lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;//
//----- C++11 random number generation when not using OpenMP -------------
//

#ifndef _OPENMP

#include &amp;lt;random&amp;gt;           // C++11 random number generators
#include &amp;lt;functional&amp;gt;

/* some web references

   https://www.cplusplus.com/reference/random/
   https://stackoverflow.com/questions/14023880/c11-random-numbers-and-stdbind-i
nteract-in-unexpected-way/14023935
   https://stackoverflow.com/questions/20671573/c11-stdgenerate-and-stduniform-r
eal-distribution-called-two-times-gives-st

*/

// declare generator and output distributions

std::default_random_engine rng;
std::uniform_real_distribution&amp;lt;float&amp;gt; uniform(0.0f, 1.0f);
std::normal_distribution&amp;lt;float&amp;gt; normal(0.0f, 1.0f);

auto next_uniform = std::bind(std::ref(uniform), std::ref(rng));
auto next_normal = std::bind(std::ref(normal), std::ref(rng));

void rng_initialisation() {
    rng.seed(1234);
    uniform.reset();
    normal.reset();
}

void rng_termination() {
}

//------- MKL/VSL random number generation when using OpenMP -----------

#else

#include &amp;lt;mkl.h&amp;gt;
#include &amp;lt;mkl_vsl.h&amp;gt;
#include &amp;lt;memory.h&amp;gt;
#include &amp;lt;omp.h&amp;gt;
#include &amp;lt;stdio.h&amp;gt;

/* each OpenMP thread has its own VSL RNG and storage */

#define NRV 16384  // number of random variables
VSLStreamStatePtr stream;
float* uniforms, * normals;
int    uniforms_count, normals_count;
#pragma omp threadprivate(stream, uniforms,uniforms_count, \
                                  normals, normals_count)

//
// RNG routines
//

void rng_initialisation() {
    int tid = omp_get_thread_num();
    int status = vslNewStream(&amp;amp;stream, VSL_BRNG_MRG32K3A, 1337);
    if (status != VSL_STATUS_OK || stream == NULL) {
        printf("Stream initialization failed with status: %d\n", status);
        return; 
    }

    long long skip = ((long long)(tid + 1)) &amp;lt;&amp;lt; 48;
    status = vslSkipAheadStream(stream, skip);
    if (status != VSL_STATUS_OK) {
        printf("vslSkipAheadStream failed wih status: %d\n", status);
        return; 
    }

    uniforms = (float*)malloc(NRV * sizeof(float));
    normals = (float*)malloc(NRV * sizeof(float));
    if (uniforms == NULL || normals == NULL) {
        printf("Memory allocation failed.\n");
        return; 
    }

    uniforms_count = 0; // this means there are no random
    normals_count = 0; // numbers in the arrays currently
}

void rng_termination() {
    vslDeleteStream(&amp;amp;stream);
    free(uniforms);
    free(normals);
}

float next_uniform() {
    if (uniforms_count == 0) {
        vsRngUniform(VSL_RNG_METHOD_UNIFORM_STD,
            stream, NRV, uniforms, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}

#endif

//
// other header files needed for both versions
//

#include &amp;lt;stdio.h&amp;gt;
#include &amp;lt;stdlib.h&amp;gt;
#include &amp;lt;math.h&amp;gt;

//
// main code
//

int main(int argc, char** argv)
{
    float  T = 1.0f, X0 = 1.0f, mu = 0.05f, sigma = 0.2f, dt;
    double sum1 = 0.0, sum2 = 0.0;
    int    M = 200;      /* number of timesteps */
    int    N = 19600000;  /* total number of MC samples */

    dt = T / ((float)M);

    // initialise generator, with separate storage for each
    // thread when compiled for OpenMP
#pragma omp parallel
    rng_initialisation();

#ifdef _OPENMP
    double wtime = omp_get_wtime();
    omp_set_num_threads(8);
#endif

#pragma omp parallel for default(none) shared(T,X0,mu,sigma,dt,M,N) \
                                       reduction(+:sum1,sum2)
    for (int n = 0; n &amp;lt; N; n++) {
        float X = X0;

        for (int m = 0; m &amp;lt; M; m++) {
            float delW = sqrtf(dt) * next_normal();
            X = X + X * (mu * dt + sigma * delW);
        }

        sum1 += X;
        sum2 += X * X;
    }

    printf("Exact solution E[X_T] = %g\n", X0 * exp(mu * T));
    printf("Monte Carlo estimate  = %g +/- %g \n", sum1 / N,
        3.0 * sqrt((sum2 / N - (sum1 / N) * (sum1 / N)) / N));
    printf("\nReminder: Monte Carlo estimate has discretisation bias\n\n");
    float RNGs = ((float)N) * ((float)M);
    printf("Random Nums generated = %g\n", RNGs);

#ifdef _OPENMP
    wtime = omp_get_wtime() - wtime;
    printf("threads               = %d\n", omp_get_max_threads());
    printf("execution time        = %10.4g\n", wtime);
    printf("RNG/s                 = %10.4g\n\n", RNGs / wtime);
#endif

    // delete generator and storage
#pragma omp parallel 
    rng_termination();
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;HR /&gt;&lt;P&gt;When I use omp_set_num_threads(8), it works fine and uses all the threads in the expected time. However, when I change the number of the threads, to 4 for instance, it shows me the&amp;nbsp;&lt;SPAN&gt;“Access violation reading location” problem. The debugging points to this part of the code:&lt;/SPAN&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;LI-CODE lang="markup"&gt;inline float next_normal() {
    if (normals_count == 0) {
        vsRngGaussian(VSL_RNG_METHOD_GAUSSIAN_BOXMULLER2,
            stream, NRV, normals, 0.0f, 1.0f);
        normals_count = NRV;
    }
    return normals[--normals_count];
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Could the issue be related to a missing library, specifically &lt;/SPAN&gt;libiomp5md.lib&lt;SPAN&gt;? The Intel MKL Link Line Advisor recommended its use, yet it appears to be missing from the installed package.&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 12 Feb 2024 11:56:23 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571491#M35813</guid>
      <dc:creator>Munera</dc:creator>
      <dc:date>2024-02-12T11:56:23Z</dc:date>
    </item>
    <item>
      <title>Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571492#M35814</link>
      <description>&lt;P&gt;ok, we will take a look at this example.&lt;/P&gt;
&lt;P&gt;meantime, there are two notes here:&lt;/P&gt;
&lt;P&gt;1. regard to&amp;nbsp;&lt;SPAN&gt;libiomp5md.lib -- the standalone version of oneMKL contains libiomp*.dll/libs by default. You can check this package from the oneMKL product page following the link:&amp;nbsp;&lt;A href="https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html" target="_blank"&gt;https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html&lt;/A&gt;&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;2. the forum thread allows anyone to attach files. it could be much more comfortable to use this option instead posting the whole code explicitly.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;--Gennady&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 12 Feb 2024 12:04:53 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1571492#M35814</guid>
      <dc:creator>Gennady_F_Intel</dc:creator>
      <dc:date>2024-02-12T12:04:53Z</dc:date>
    </item>
    <item>
      <title>Re:Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572243#M35825</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;The reproducer does work with different number of threads be it 2,3 or 4 and does not show any runtime error.&lt;/P&gt;&lt;P&gt;Could you please let me know the following:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Which processor you are using &lt;/LI&gt;&lt;LI&gt;Which command are you using while compiling the code.&lt;/LI&gt;&lt;/OL&gt;&lt;BR /&gt;</description>
      <pubDate>Wed, 14 Feb 2024 09:48:05 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572243#M35825</guid>
      <dc:creator>Mahan</dc:creator>
      <dc:date>2024-02-14T09:48:05Z</dc:date>
    </item>
    <item>
      <title>Re:Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572767#M35838</link>
      <description>&lt;P&gt;HI,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Could you please provide me with the above-mentioned details.&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Fri, 16 Feb 2024 02:25:34 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572767#M35838</guid>
      <dc:creator>Mahan</dc:creator>
      <dc:date>2024-02-16T02:25:34Z</dc:date>
    </item>
    <item>
      <title>Re: Re:Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572853#M35840</link>
      <description>&lt;P&gt;Hi Mahan,&amp;nbsp;&lt;/P&gt;&lt;P&gt;&lt;SPAN&gt;Sorry, I was so busy I could &lt;/SPAN&gt;&lt;SPAN class=""&gt;not&lt;/SPAN&gt;&lt;SPAN&gt; reply &lt;/SPAN&gt;&lt;SPAN class=""&gt;to&lt;/SPAN&gt;&lt;SPAN&gt; you right away. Thank you &lt;/SPAN&gt;&lt;SPAN class=""&gt;for&lt;/SPAN&gt;&lt;SPAN&gt; your help &lt;/SPAN&gt;&lt;SPAN class=""&gt;and&lt;/SPAN&gt;&lt;SPAN&gt; fast response. Here are the details you requested: &lt;/SPAN&gt;&lt;/P&gt;&lt;P&gt;1. Processor: I am using an 11th Gen Intel(R) Core(TM) i3-1125G4 @ 2.00GHz, with 1997 Mhz, 4 Core(s), and 8 Logical Processor(s).&lt;/P&gt;&lt;P&gt;2. Command Used While Compiling: I compile the code using the "Build and Run" feature in Microsoft Visual Studio. This feature automates the compilation process, so I do not use a specific command line instruction manually.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Feb 2024 07:04:26 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1572853#M35840</guid>
      <dc:creator>Munera</dc:creator>
      <dc:date>2024-02-16T07:04:26Z</dc:date>
    </item>
    <item>
      <title>Re:Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573418#M35850</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Please make sure the following properties are correctly set for the project and the .cpp source file.&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;Intel OneAPI 2024 compiler &lt;/LI&gt;&lt;LI&gt;C++17 Standard&lt;/LI&gt;&lt;LI&gt;oneMKL with LP64&lt;/LI&gt;&lt;LI&gt;OpenMP&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;Please see the attached screen shots for reference&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Mon, 19 Feb 2024 03:37:12 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573418#M35850</guid>
      <dc:creator>Mahan</dc:creator>
      <dc:date>2024-02-19T03:37:12Z</dc:date>
    </item>
    <item>
      <title>Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573419#M35851</link>
      <description>&lt;P&gt;Hi&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;SPAN&gt;Please see the attached screen shots for reference&lt;/SPAN&gt;&lt;/P&gt;</description>
      <pubDate>Mon, 19 Feb 2024 03:42:03 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573419#M35851</guid>
      <dc:creator>Mahan</dc:creator>
      <dc:date>2024-02-19T03:42:03Z</dc:date>
    </item>
    <item>
      <title>Re: Assistance Required: Runtime Errors with Intel MKL and OpenMP in Windows Environment</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573574#M35853</link>
      <description>&lt;P&gt;Thank you so much! I adjusted those settings and the code worked with all threads.&lt;/P&gt;</description>
      <pubDate>Mon, 19 Feb 2024 14:36:39 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/Assistance-Required-Runtime-Errors-with-Intel-MKL-and-OpenMP-in/m-p/1573574#M35853</guid>
      <dc:creator>Munera</dc:creator>
      <dc:date>2024-02-19T14:36:39Z</dc:date>
    </item>
  </channel>
</rss>

