- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I need to calculate square i.e. x^2 of the very big uint64_t array number, then subtract 2 and calculate the MOD(result, x).
Can you advise me if the oneAPI MKL is suitable for that purpose ? What speed I can expect if the x is 2^136279841 - 1. It's one step of Lucas-Lehmer Primality Testing.
What MKL commands do I need? Does the MKL library automatically call the best algorithm for the calculation (FFT/iFFT, Karatsuba etc.).
#include <iostream> #include <immintrin.h> #include <cstdint> #include <windows.h> // For VirtualAlloc and VirtualFree // Constants and definitions constexpr size_t GB = 3; constexpr size_t giga = 1024 * 1024 * 1024; size_t num_bits = 136279841; size_t num_uint64 = ((num_bits + 255) / 256) * 4; // Number of uint64_t elements // Allocate 3GB memory static const size_t giga = 1024 * 1024 * 1024; static const size_t size = GB * giga; uint64_t* ARRAY = static_cast<uint64_t*>(VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE)); // Initialize the large number in POLE (example initialization) uint64_t* x = ARRAY; uint64_t* result = ARRAY + num_uint64; uint64_t* tmp = ARRAY + 3 * num_uint64; // Store the number 2^136279841 - 1 using _mm256_maskstore_epi64 in a loop __m256i ones = _mm256_set1_epi64x(-1); size_t i = 0; for (; i < (num_uint64)-4; i += 4) { _mm256_store_si256((__m256i*) & x[i], ones); } // Handle remaining bits _mm256_maskstore_epi64((long long int*) & x[i], _mm256_setr_epi64x(-1, -1, -1, -1), _mm256_setr_epi64x(0x01FFFFFFFF, 0, 0, 0));
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
What about this code generated by ChatGPT 4o ? Can someone test the speed of squaring and MOD before I install MKL library in my Visual Studio 2022?
#include <iostream>
#include <immintrin.h>
#include <cstdint>
#include <windows.h> // For VirtualAlloc and VirtualFree
#include <oneapi/mkl.hpp>
// Constants and definitions
constexpr size_t GB = 3;
constexpr size_t giga = 1024 * 1024 * 1024;
size_t num_bits = 136279841;
size_t num_uint64 = ((num_bits + 255) / 256) * 4; // Number of uint64_t elements
void print_bigint(const uint64_t* num, size_t len) {
for (size_t i = len; i > 0; --i) {
printf("%016llx", num[i-1]);
}
printf("\n");
}
int main() {
// Allocate 3GB memory
static const size_t size = GB * giga;
uint64_t* ARRAY = static_cast<uint64_t*>(VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE));
// Initialize the large number in POLE (example initialization)
uint64_t* x = ARRAY;
uint64_t* result = ARRAY + num_uint64;
uint64_t* tmp = ARRAY + 3 * num_uint64;
// Store the number 2^136279841 - 1 using _mm256_maskstore_epi64 in a loop
__m256i ones = _mm256_set1_epi64x(-1);
size_t i = 0;
for (; i < (num_uint64)-4; i += 4) {
_mm256_store_si256((__m256i*) & x[i], ones);
}
// Handle remaining bits
_mm256_maskstore_epi64((long long int*) & x[i], _mm256_setr_epi64x(-1, -1, -1, -1), _mm256_setr_epi64x(0x01FFFFFFFF, 0, 0, 0));
try {
// Create a oneAPI queue
sycl::queue queue{sycl::default_selector{}};
// Using MKL to square the number x
oneapi::mkl::vm::mul(queue, num_uint64, x, x, result).wait();
// Subtract 2 from result
result[0] -= 2;
// Compute result mod x
uint64_t* mod_result = tmp;
oneapi::mkl::vm::mod(queue, num_uint64, result, x, mod_result).wait();
// Print the result
std::cout << "Result mod x: ";
print_bigint(mod_result, num_uint64);
} catch (const sycl::exception& e) {
std::cerr << "SYCL exception: " << e.what() << std::endl;
return 1;
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return 1;
}
VirtualFree(ARRAY, 0, MEM_RELEASE);
return 0;
}

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page