//============================================================== // Copyright © Intel Corporation // // SPDX-License-Identifier: MIT // ============================================================= #include #include #include #if FPGA || FPGA_EMULATOR #include #endif using namespace sycl; // Vector type and data size for this example. size_t vector_size = 10000; typedef std::vector IntVector; // Create an exception handler for asynchronous SYCL exceptions static auto exception_handler = [](sycl::exception_list e_list) { for (std::exception_ptr const& e : e_list) { try { std::rethrow_exception(e); } catch (std::exception const& e) { #if _DEBUG std::cout << "Failure" << std::endl; #endif std::terminate(); } } }; //************************************ // Vector add in DPC++ on device: returns sum in 4th parameter "sum_parallel". //************************************ void VectorAdd(queue& q, const IntVector& a_vector, const IntVector& b_vector, IntVector& sum_parallel) { // Create the range object for the vectors managed by the buffer. range<1> num_items{ a_vector.size() }; // Create buffers that hold the data shared between the host and the devices. // The buffer destructor is responsible to copy the data back to host when it // goes out of scope. buffer a_buf(a_vector); buffer b_buf(b_vector); buffer sum_buf(sum_parallel.data(), num_items); // Submit a command group to the queue by a lambda function that contains the // data access permission and device computation (kernel). q.submit([&](handler& h) { // Create an accessor for each buffer with access permission: read, write or // read/write. The accessor is a mean to access the memory in the buffer. accessor a(a_buf, h, read_only); accessor b(b_buf, h, read_only); // The sum_accessor is used to store (with write permission) the sum data. accessor sum(sum_buf, h, write_only, no_init); // Use parallel_for to run vector addition in parallel on device. This // executes the kernel. // 1st parameter is the number of work items. // 2nd parameter is the kernel, a lambda that specifies what to do per // work item. The parameter of the lambda is the work item id. // DPC++ supports unnamed lambda kernel by default. h.parallel_for(num_items, [=](auto i) { sum[i] = a[i] + b[i]; }); }); } //************************************ // Initialize the vector from 0 to vector_size - 1 //************************************ void InitializeVector(IntVector& a) { for (size_t i = 0; i < a.size(); i++) a.at(i) = i; } //************************************ // Demonstrate vector add both in sequential on CPU and in parallel on device. //************************************ int main(int argc, char* argv[]) { // Change vector_size if it was passed as argument if (argc > 1) vector_size = std::stoi(argv[1]); // Create device selector for the device of your interest. #if FPGA_EMULATOR // DPC++ extension: FPGA emulator selector on systems without FPGA card. ext::intel::fpga_emulator_selector d_selector; #elif FPGA // DPC++ extension: FPGA selector on systems with FPGA card. ext::intel::fpga_selector d_selector; #else // The default device selector will select the most performant device. default_selector d_selector; #endif // Create vector objects with "vector_size" to store the input and output data. IntVector a, b, sum_sequential, sum_parallel; a.resize(vector_size); b.resize(vector_size); sum_sequential.resize(vector_size); sum_parallel.resize(vector_size); // Initialize input vectors with values from 0 to vector_size - 1 InitializeVector(a); InitializeVector(b); try { queue q(d_selector, exception_handler); // Print out the device information used for the kernel code. std::cout << "Running on device: " << q.get_device().get_info() << "\n"; std::cout << "Vector size: " << a.size() << "\n"; // Vector addition in DPC++ VectorAdd(q, a, b, sum_parallel); } catch (exception const& e) { std::cout << "An exception is caught for vector add.\n"; std::terminate(); } // Compute the sum of two vectors in sequential for validation. for (size_t i = 0; i < sum_sequential.size(); i++) sum_sequential.at(i) = a.at(i) + b.at(i); // Verify that the two vectors are equal. for (size_t i = 0; i < sum_sequential.size(); i++) { if (sum_parallel.at(i) != sum_sequential.at(i)) { std::cout << "Vector add failed on device.\n"; return -1; } } int indices[]{ 0, 1, 2, (static_cast(a.size()) - 1) }; constexpr size_t indices_size = sizeof(indices) / sizeof(int); // Print out the result of vector add. for (int i = 0; i < indices_size; i++) { int j = indices[i]; if (i == indices_size - 1) std::cout << "...\n"; std::cout << "[" << j << "]: " << a[j] << " + " << b[j] << " = " << sum_parallel[j] << "\n"; } a.clear(); b.clear(); sum_sequential.clear(); sum_parallel.clear(); std::cout << "Vector add successfully completed on device.\n"; return 0; }