<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: MKL and arithmetic with 2^136279841 - 1 in Intel® oneAPI Math Kernel Library</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/MKL-and-arithmetic-with-2-136279841-1/m-p/1647179#M36711</link>
    <description>&lt;P&gt;What about this code generated by ChatGPT 4o ? Can someone test the speed of squaring and MOD before I install MKL library in my Visual Studio 2022?&lt;/P&gt;&lt;LI-CODE lang="cpp"&gt;#include &amp;lt;iostream&amp;gt;
#include &amp;lt;immintrin.h&amp;gt;
#include &amp;lt;cstdint&amp;gt;
#include &amp;lt;windows.h&amp;gt;   // For VirtualAlloc and VirtualFree
#include &amp;lt;oneapi/mkl.hpp&amp;gt;

// Constants and definitions
constexpr size_t GB = 3;
constexpr size_t giga = 1024 * 1024 * 1024;
size_t num_bits = 136279841;
size_t num_uint64 = ((num_bits + 255) / 256) * 4; // Number of uint64_t elements

void print_bigint(const uint64_t* num, size_t len) {
    for (size_t i = len; i &amp;gt; 0; --i) {
        printf("%016llx", num[i-1]);
    }
    printf("\n");
}

int main() {
	// Allocate 3GB memory
	static const size_t size = GB * giga;
	uint64_t* ARRAY = static_cast&amp;lt;uint64_t*&amp;gt;(VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));

	// Initialize the large number in POLE (example initialization)
	uint64_t* x = ARRAY;
	uint64_t* result = ARRAY + num_uint64;
	uint64_t* tmp = ARRAY + 3 * num_uint64;

	// Store the number 2^136279841 - 1 using _mm256_maskstore_epi64 in a loop
	__m256i ones = _mm256_set1_epi64x(-1);
	size_t i = 0;
	for (; i &amp;lt; (num_uint64)-4; i += 4) {
	    _mm256_store_si256((__m256i*) &amp;amp; x[i], ones);
	}

	// Handle remaining bits
	_mm256_maskstore_epi64((long long int*) &amp;amp; x[i], _mm256_setr_epi64x(-1, -1, -1, -1), _mm256_setr_epi64x(0x01FFFFFFFF, 0, 0, 0));

    try {
        // Create a oneAPI queue
        sycl::queue queue{sycl::default_selector{}};
        
        // Using MKL to square the number x
        oneapi::mkl::vm::mul(queue, num_uint64, x, x, result).wait();

        // Subtract 2 from result
        result[0] -= 2;

        // Compute result mod x
        uint64_t* mod_result = tmp;
        oneapi::mkl::vm::mod(queue, num_uint64, result, x, mod_result).wait();

        // Print the result
        std::cout &amp;lt;&amp;lt; "Result mod x: ";
        print_bigint(mod_result, num_uint64);

    } catch (const sycl::exception&amp;amp; e) {
        std::cerr &amp;lt;&amp;lt; "SYCL exception: " &amp;lt;&amp;lt; e.what() &amp;lt;&amp;lt; std::endl;
        return 1;
    } catch (const std::exception&amp;amp; e) {
        std::cerr &amp;lt;&amp;lt; "Exception: " &amp;lt;&amp;lt; e.what() &amp;lt;&amp;lt; std::endl;
        return 1;
    }

    VirtualFree(ARRAY, 0, MEM_RELEASE);
    return 0;
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 03 Dec 2024 20:18:00 GMT</pubDate>
    <dc:creator>richter__dan</dc:creator>
    <dc:date>2024-12-03T20:18:00Z</dc:date>
    <item>
      <title>MKL and arithmetic with 2^136279841 - 1</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/MKL-and-arithmetic-with-2-136279841-1/m-p/1646940#M36706</link>
      <description>&lt;P&gt;I need to calculate square i.e. x^2 of the very big uint64_t array number, then subtract 2 and calculate the MOD(result, x).&lt;/P&gt;&lt;P&gt;Can you advise me if the oneAPI MKL is suitable for that purpose ? What speed I can expect if the x is 2^136279841 - 1. It's one step of Lucas-Lehmer Primality Testing.&lt;/P&gt;&lt;P&gt;What MKL commands do I need? Does the MKL library automatically call the best algorithm for the calculation (FFT/iFFT, Karatsuba etc.).&lt;BR /&gt;&lt;BR /&gt;&lt;/P&gt;&lt;PRE&gt;#include &amp;lt;iostream&amp;gt;
#include &amp;lt;immintrin.h&amp;gt;
#include &amp;lt;cstdint&amp;gt;
#include &amp;lt;windows.h&amp;gt;   // For VirtualAlloc and VirtualFree

// Constants and definitions
constexpr size_t GB = 3;
constexpr size_t giga = 1024 * 1024 * 1024;
size_t num_bits = 136279841;
size_t num_uint64 = ((num_bits + 255) / 256) * 4; // Number of uint64_t elements

// Allocate 3GB memory
static const size_t giga = 1024 * 1024 * 1024;
static const size_t size = GB * giga;
uint64_t* ARRAY = static_cast&amp;lt;uint64_t*&amp;gt;(VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));

// Initialize the large number in POLE (example initialization)
uint64_t* x = ARRAY;
uint64_t* result = ARRAY + num_uint64;
uint64_t* tmp = ARRAY + 3 * num_uint64;

// Store the number 2^136279841 - 1 using _mm256_maskstore_epi64 in a loop
__m256i ones = _mm256_set1_epi64x(-1);
size_t i = 0;
for (; i &amp;lt; (num_uint64)-4; i += 4) {
    _mm256_store_si256((__m256i*) &amp;amp; x[i], ones);
}

// Handle remaining bits
_mm256_maskstore_epi64((long long int*) &amp;amp; x[i], _mm256_setr_epi64x(-1, -1, -1, -1), _mm256_setr_epi64x(0x01FFFFFFFF, 0, 0, 0));&lt;/PRE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 02 Dec 2024 21:44:53 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/MKL-and-arithmetic-with-2-136279841-1/m-p/1646940#M36706</guid>
      <dc:creator>richter__dan</dc:creator>
      <dc:date>2024-12-02T21:44:53Z</dc:date>
    </item>
    <item>
      <title>Re: MKL and arithmetic with 2^136279841 - 1</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/MKL-and-arithmetic-with-2-136279841-1/m-p/1647179#M36711</link>
      <description>&lt;P&gt;What about this code generated by ChatGPT 4o ? Can someone test the speed of squaring and MOD before I install MKL library in my Visual Studio 2022?&lt;/P&gt;&lt;LI-CODE lang="cpp"&gt;#include &amp;lt;iostream&amp;gt;
#include &amp;lt;immintrin.h&amp;gt;
#include &amp;lt;cstdint&amp;gt;
#include &amp;lt;windows.h&amp;gt;   // For VirtualAlloc and VirtualFree
#include &amp;lt;oneapi/mkl.hpp&amp;gt;

// Constants and definitions
constexpr size_t GB = 3;
constexpr size_t giga = 1024 * 1024 * 1024;
size_t num_bits = 136279841;
size_t num_uint64 = ((num_bits + 255) / 256) * 4; // Number of uint64_t elements

void print_bigint(const uint64_t* num, size_t len) {
    for (size_t i = len; i &amp;gt; 0; --i) {
        printf("%016llx", num[i-1]);
    }
    printf("\n");
}

int main() {
	// Allocate 3GB memory
	static const size_t size = GB * giga;
	uint64_t* ARRAY = static_cast&amp;lt;uint64_t*&amp;gt;(VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));

	// Initialize the large number in POLE (example initialization)
	uint64_t* x = ARRAY;
	uint64_t* result = ARRAY + num_uint64;
	uint64_t* tmp = ARRAY + 3 * num_uint64;

	// Store the number 2^136279841 - 1 using _mm256_maskstore_epi64 in a loop
	__m256i ones = _mm256_set1_epi64x(-1);
	size_t i = 0;
	for (; i &amp;lt; (num_uint64)-4; i += 4) {
	    _mm256_store_si256((__m256i*) &amp;amp; x[i], ones);
	}

	// Handle remaining bits
	_mm256_maskstore_epi64((long long int*) &amp;amp; x[i], _mm256_setr_epi64x(-1, -1, -1, -1), _mm256_setr_epi64x(0x01FFFFFFFF, 0, 0, 0));

    try {
        // Create a oneAPI queue
        sycl::queue queue{sycl::default_selector{}};
        
        // Using MKL to square the number x
        oneapi::mkl::vm::mul(queue, num_uint64, x, x, result).wait();

        // Subtract 2 from result
        result[0] -= 2;

        // Compute result mod x
        uint64_t* mod_result = tmp;
        oneapi::mkl::vm::mod(queue, num_uint64, result, x, mod_result).wait();

        // Print the result
        std::cout &amp;lt;&amp;lt; "Result mod x: ";
        print_bigint(mod_result, num_uint64);

    } catch (const sycl::exception&amp;amp; e) {
        std::cerr &amp;lt;&amp;lt; "SYCL exception: " &amp;lt;&amp;lt; e.what() &amp;lt;&amp;lt; std::endl;
        return 1;
    } catch (const std::exception&amp;amp; e) {
        std::cerr &amp;lt;&amp;lt; "Exception: " &amp;lt;&amp;lt; e.what() &amp;lt;&amp;lt; std::endl;
        return 1;
    }

    VirtualFree(ARRAY, 0, MEM_RELEASE);
    return 0;
}&lt;/LI-CODE&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 03 Dec 2024 20:18:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/MKL-and-arithmetic-with-2-136279841-1/m-p/1647179#M36711</guid>
      <dc:creator>richter__dan</dc:creator>
      <dc:date>2024-12-03T20:18:00Z</dc:date>
    </item>
  </channel>
</rss>

