#include #include #include #include #include #include #include #include #include const std::vector CELL_SIZES = { 1000, 10000, 100000, 1000000, 256 * 256 * 256}; constexpr unsigned DEFAULT_RUNS = 1000; using TimeMs = std::chrono::duration; struct ExampleSet { sycl::uint4 centroid; sycl::uint16 surfel; unsigned probability_occupied_; unsigned probability_occupied() { return probability_occupied_; } }; struct ArrayOfStructs { ArrayOfStructs(size_t size, sycl::queue& queue) : v{size, allocator{queue}} {} using allocator = sycl::usm_allocator; std::vector v; size_t size() const { return v.size(); } template ExampleSet* get_access(sycl::handler&) { return v.data(); } }; struct StructOfArrays { StructOfArrays(size_t size, sycl::queue& queue) : probabilities_{size, uintallocator{queue}}, surfels_{size, uint8allocator{queue}}, centroids_{size, uint4allocator{queue}} { } struct ReferenceAccess { // sycl::uint4& centroid; // sycl::uint8& surfel; unsigned& probability_occupied_; unsigned probability_occupied() { return probability_occupied_; } }; struct Acessor { ReferenceAccess operator[](size_t index) { // return ReferenceAccess{ // centroid[index], surfel[index], probability_occupied_[index]}; return ReferenceAccess{probability_occupied_[index]}; } ReferenceAccess operator[](size_t index) const { // return ReferenceAccess{ // centroid[index], surfel[index], probability_occupied_[index]}; return ReferenceAccess{probability_occupied_[index]}; } sycl::uint4* centroid; sycl::uint8* surfel; unsigned* probability_occupied_; }; template Acessor get_access(sycl::handler&) { return Acessor{ .centroid = centroids_.data(), .surfel = surfels_.data(), .probability_occupied_ = probabilities_.data()}; } size_t size() const { return probabilities_.size(); } using uintallocator = sycl::usm_allocator; std::vector probabilities_; using uint8allocator = sycl::usm_allocator; std::vector surfels_; using uint4allocator = sycl::usm_allocator; std::vector centroids_; }; struct Prob { unsigned probability; unsigned probability_occupied() const { return probability; } }; constexpr static unsigned OCCUPIED_LIMIT = 50; template TimeMs run_calculation(sycl::queue& q, TMemType& mem, size_t expected_count) { sycl::buffer total_count{1}; total_count.get_host_access()[0] = 0; auto event = q.submit( [&](sycl::handler& cgh) { auto mem_access = mem.template get_access(cgh); auto count_access = total_count.get_access(cgh); cgh.parallel_for( sycl::range<1>(mem.size()), [=](sycl::id<1> idx) { unsigned probability_occupied = 0; probability_occupied = mem_access[idx].probability_occupied(); if (probability_occupied > OCCUPIED_LIMIT) { auto v = sycl::atomic_ref< unsigned, sycl::memory_order::relaxed, sycl::memory_scope::device, sycl::access::address_space::global_space>(count_access[0]); v += 1; } } ); } ); event.wait(); auto end = event.template get_profiling_info(); auto start = event.template get_profiling_info(); if (total_count.get_host_access()[0] != expected_count) { std::cerr << "Our count was off! " << total_count.get_host_access()[0] << " vs " << expected_count << "\n "; std::exit(1); } return std::chrono::duration{end - start}; } struct Timing { TimeMs worst_time, mean_time; }; template Timing multi_run(sycl::queue& q, TMemType& mem, size_t expected_count) { std::vector times(DEFAULT_RUNS); ranges::generate(times, [&]() { return run_calculation(q, mem, expected_count); }); Timing out = { .worst_time = ranges::max(times), .mean_time = std::accumulate(std::begin(times), std::end(times), TimeMs{}) / times.size()}; return out; } int main() { auto device = sycl::device{sycl::gpu_selector{}}; auto context = sycl::context{device}; auto profiling_property = cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}; auto queue = sycl::queue{device, profiling_property}; std::cout << "Running array of struct vs struct of arrays on <" << device.get_info() << "> ...\n"; std::cout << "\n\nNum cells | structure type | runtime average (ms) | runtime " "worstcase (ms)\n"; std::cout << "---------------------------------------------------------------------" "-------\n"; for (auto cell_size : CELL_SIZES) { std::vector probabilities(cell_size); std::mt19937 rng(0); std::uniform_int_distribution dist6(0, 100); ranges::generate(probabilities, [&]() { return dist6(rng); }); size_t num_occupied = ranges::count_if( probabilities, [](auto val) { return val > OCCUPIED_LIMIT; } ); { ArrayOfStructs arr{cell_size, queue}; for (auto i : ranges::views::iota(0ul, cell_size)) { arr.v[i].probability_occupied_ = probabilities[i]; } auto timing = multi_run(queue, arr, num_occupied); std::cout << cell_size << " | " << " Structs " << " | " << timing.mean_time.count() << " | " << timing.worst_time.count() << "\n"; } { StructOfArrays arr{cell_size, queue}; for (auto i : ranges::views::iota(0ul, cell_size)) { arr.probabilities_[i] = probabilities[i]; } auto timing = multi_run(queue, arr, num_occupied); std::cout << cell_size << " | " << " Arrays " << " | " << timing.mean_time.count() << " | " << timing.worst_time.count() << "\n"; } { sycl::buffer arr{sycl::range<1>{cell_size}}; { auto h_access = arr.get_host_access(); for (auto i : ranges::views::iota(0ul, cell_size)) { h_access[i].probability = probabilities[i]; } } auto timing = multi_run(queue, arr, num_occupied); std::cout << cell_size << " | " << " S Array " << " | " << timing.mean_time.count() << " | " << timing.worst_time.count() << "\n"; } } return 0; }