// DPC++ material used in the code sample: // • A one dimensional array of data shared between CPU and offload device. // • A device queue and kernel. #include #include #include #include using namespace sycl; cpu_selector d_selector; queue q(d_selector); unsigned long long DeltaFPS, CountFPS; constexpr size_t array_size = 100 * 100 * 100; float4* a = malloc_shared(array_size, q); float4* sum_parallel = malloc_shared(array_size, q); void VectorAdd(queue &q, const float4* a, float4* sum, size_t size) { range<1> num_items{size}; q.parallel_for(num_items, [=](auto i) { sum[i] = a[i] + 10; }).wait(); } void ShowFPS() { CountFPS++; if (GetTickCount64() > DeltaFPS + 1000) { printf("FPS = %lld\n", CountFPS); CountFPS = 0; DeltaFPS = GetTickCount64(); } } int main() { std::cout << "Running on device: " << q.get_device().get_info() << "\n"; while (true) { VectorAdd(q, a, sum_parallel, array_size); ShowFPS(); } return 0; }