#include <oneapi/dpl/execution>
#include <oneapi/dpl/algorithm>
#include <vector>

namespace
{
//         auto constexpr test_size = 1024ul*1024ul*256ul;
        auto constexpr test_size = 16ul;
        void one_task(cl::sycl::device const& a_device);
}

int main()
{
        auto const & platforms = sycl::platform::get_platforms();
        for (auto const &platform : platforms) {
          auto const platform_name = platform.get_info<sycl::info::platform::name>();
          std::cout << "Platform: " << platform_name
                    << std::endl;
          auto const & devices = platform.get_devices();
          for (auto &device : devices) {
            try
            {
              one_task(device);
            }
            catch (cl::sycl::exception const& e)
            {
              std::cout << "Caught synchronous SYCL exception:\n"
                        << e.what() << std::endl;
            }
          }
        }
        return 0;
}

namespace
{
        void one_task(cl::sycl::device const& a_device)
        {
                std::cout << " Device: " << a_device.get_info<sycl::info::device::name>()
                          << std::endl;
                auto const exception_handler = [] (cl::sycl::exception_list exceptions)
                {
                  for (std::exception_ptr const& e : exceptions)
                  {
                    try
                    {
                      std::rethrow_exception(e);
                    }
                    catch(cl::sycl::exception const& e)
                    {
                      std::cout << "Caught asynchronous SYCL exception:\n"
                                << e.what() << std::endl;
                    }
                  }
                };
                auto a_queue = cl::sycl::queue (a_device, exception_handler); // not const to wait and throw
                auto const policy = dpl::execution::make_device_policy<class PolicyC>(a_queue);
                auto const start = std::chrono::steady_clock::now();
                std::vector<double> data(test_size);
                auto const allocated = std::chrono::steady_clock::now();
                // reversed array is the worst case for quicksort
                std::iota(std::rbegin(data), std::rend(data), 0);
                auto const generated = std::chrono::steady_clock::now();
                std::sort(policy, std::begin(data), std::end(data));
                a_queue.wait_and_throw();
                auto const sorted = std::chrono::steady_clock::now();
                auto const success = std::is_sorted(policy, std::begin(data), std::end(data));
                a_queue.wait_and_throw();
                auto const checked = std::chrono::steady_clock::now();
                std::cout << (success ? "Sorted" : "Not sorted") << std::endl;
                std::chrono::duration<double> const    total_time = checked   - start;
                std::chrono::duration<double> const allocate_time = allocated - start;
                std::chrono::duration<double> const generate_time = generated - allocated;  
                std::chrono::duration<double> const     sort_time = sorted    - generated;
                std::chrono::duration<double> const    check_time = checked   - sorted;
                std::cout <<   "Total      : " <<    total_time.count()
                          << "\nAllocating : " << allocate_time.count()
                          << "\nGenerating : " << generate_time.count()
                          << "\nSorting    : " <<     sort_time.count()
                          << "\nChecking   : " <<    check_time.count()
                          << std::endl;
        }
}
