Intel® oneAPI DPC++/C++ Compiler
Talk to fellow users of Intel® oneAPI DPC++/C++ Compiler and companion tools like Intel® oneAPI DPC++ Library, Intel® DPC++ Compatibility Tool, and Intel® Distribution for GDB*

Errors GPU / FPGA runtime

Alechiove
Beginner
950 Views

Hi, i have this code i wrote in c++ for computing pagerank.

It compiled and worked on cpu.

I compiled it also with fpga.

But when i try to run it on GPUs or FPGA's emulators, it doesn't work and returns runtime errors.

What can they be?

 

Here's the errors on

GPU

 

//command line to run program:
./BUFFER_PageRank 2 "datasets/cit-Patents.csv" 3e-05 0.85 1 


Device : Intel(R) UHD Graphics P630 [0x3e96]
TIME FOR SETUP
Elapsed time in nanoseconds: 9078559211 ns
Elapsed time in microseconds: 9078559 µs
Elapsed time in milliseconds: 9078 ms
Elapsed time in seconds: 9 sec
terminate called after throwing an instance of 'sycl::_V1::runtime_error'
  what():  Native API failed. Native API returns: -1 (PI_ERROR_DEVICE_NOT_FOUND) -1 (PI_ERROR_DEVICE_NOT_FOUND)
Aborted

 

FPGA

 

// Command line to run Program
./Arria10_BUFFER_PageRank.fpga 1 "datasets/cit-Patents.csv" 3e-05 0.85 1 


Device : Intel(R) FPGA Emulation Device
TIME FOR SETUP
Elapsed time in nanoseconds: 8733429344 ns
Elapsed time in microseconds: 8733429 µs
Elapsed time in milliseconds: 8733 ms
Elapsed time in seconds: 8 sec
terminate called after throwing an instance of 'sycl::_V1::runtime_error'
  what():  Invalid device program image: size is zero -30 (PI_ERROR_INVALID_VALUE)
Aborted

 

Here's how i compiled them and also all the code:

 

Compiling commands:

 

 

// for CPU / GPU
icpx -fsycl BUFFER_PageRank.cpp -o BUFFER_PageRank
// for FPGA
icpx -fsycl -fintelfpga -Xshardware -Xstarget=Arria10 BUFFER_PageRank.cpp -o Arria10_BUFFER_PageRank.fpga

 

 

main code (BUFFER_PageRank.cpp):

 

 

#include <sycl/sycl.hpp>
#include <sycl/ext/intel/fpga_extensions.hpp>
#include <cmath>
#include <chrono>
#include <iostream>
#include <vector>
#include <cmath>
#include "guideline.h"
#include "print_vector.h"
#include "print_time.h"
#include "read_graph.h"
#include "flatVector.h"


using namespace sycl;

int main(int argc, char* argv[]){
    // Check Command Line
    if(argc < 6){
        // NOT ENOUGH PARAMS BY COMMAND LINE -> PROGRAM HALTS
        guideline();
    }
    else{
        // Command Line parsing
        int device_selected = atoi(argv[1]);
        std::string csv_path = argv[2];
        float threshold = atof(argv[3]);
        float damping = atof(argv[4]);
        int verbose;
        try{verbose = atoi(argv[5]);}
        catch (exception const& e) {verbose = 0;}
        device d = device(default_selector());
        
        // Queue
        queue q(d);
        std::cout << "Device : " << q.get_device().get_info<info::device::name>() << "\n"; // print del device
        // Reading and setup Time Calculation
        auto start_setup = std::chrono::steady_clock::now();
        // Graph Retrieval by csv file
        std::vector<std::vector<int>> graph = Read_graph(csv_path);/*Sparse Matrix Representation with the description of each Edge of the Graph*/
        std::vector<int> flatGraph = flatten<int>(graph);
        // Calculation of the # Nodes
        int numNodes  = countNodes(graph);
        // Calculation of the Degree of each node
        std::vector<int> degreesNodes = getDegrees(graph, numNodes+1);
        auto end_setup = std::chrono::steady_clock::now();
        // Setup Execution Time print
        std::cout << "TIME FOR SETUP" << "\n";
        print_time(start_setup, end_setup);
        // Check Print
        //printVector<int>(degreesNodes);
        //Creation of Initial and Final Ranks' vectors of PageRank [R(t); R(t+1)]
        std::vector<float> ranks_t(numNodes, (float)(1.0/ (float)(numNodes)));
        std::vector<float> ranks_t_plus_one(numNodes, 0.0);
        std::vector<float> ranksDifferences(numNodes, 0.0);

        // PageRank Execution Time calculation
        auto start = std::chrono::steady_clock::now();
        buffer<int> bufferEdges(flatGraph.data(),flatGraph.size());
        buffer<float> bufferRanks(ranks_t.data(),ranks_t.size());
        buffer<int> bufferDegrees(degreesNodes.data(),degreesNodes.size());
        buffer<float> bufferRanksNext(ranks_t_plus_one.data(),ranks_t_plus_one.size());
        buffer<float> bufferRanksDifferences(ranksDifferences.data(),ranksDifferences.size());
        float distance = threshold + 1;
        int graph_size = flatGraph.size();
        int T = 1;
        while (distance > threshold) {
            q.submit([&](handler &h){
                accessor Edges(bufferEdges,h,read_only);
                accessor Ranks(bufferRanks,h,read_only);
                accessor Degrees(bufferDegrees,h,read_only);
                accessor RanksNext(bufferRanksNext,h,write_only);
                accessor RanksDifferences(bufferRanksDifferences,h,write_only);
                h.parallel_for(range<1>(numNodes),[=] (id<1> i){
                    RanksNext[i] = (1.0 - damping) / numNodes;
                    int index_node_i;
                    int index_node_j;
                    for (int j = 0; j<graph_size;j+=2) {
                        index_node_i = j;
                        index_node_j = j + 1;
                        if (Edges[index_node_j] == i) {
                            RanksNext[i] += damping * Ranks[Edges[index_node_i]] / Degrees[Edges[index_node_i]];
                        }
                    }
                    RanksDifferences[i] = (RanksNext[i] - Ranks[i]) * (RanksNext[i] - Ranks[i]);
                });
            }).wait();
            distance = 0;
            for (int i = 0; i < numNodes; i++) {
                distance += ranksDifferences[i];
                ranks_t[i] = ranks_t_plus_one[i];
                ranks_t_plus_one[i] = 0.0;
            }
            distance = sqrt(distance);
            std::cout<< "Time:\t" << T << "\tEuclidian Distance:\t" << distance << std::endl;
            T++;
        }
        auto end = std::chrono::steady_clock::now();
        // PageRank Results Printing
        if(verbose == 1){
            for(int i = 0;i<ranks_t.size();i++){
                std::cout<<"Final Vector" << i<< "-th component:\t"<<ranks_t[i]<<std::endl;
            }
        }
        std::cout<<std::endl<<std::endl<<std::endl;
        std::cout<<"Final Norm:\t"<<distance<<std::endl;
        // PageRank Execution Time Printing
        std::cout << "TIME FOR PAGERANK" << "\n";
        print_time(start, end);

    }
    return 0;
}

 

 

 other codes:

flatVector.h

 

 

#include <iostream>
#include <vector>

template<typename T>
std::vector<T> flatten(const std::vector<std::vector<T>>& nestedVector) {
    std::vector<T> flatVector;
    for (const auto& subVector : nestedVector) {
        for (const auto& element : subVector) {
            flatVector.push_back(element);
        }
    }
    return flatVector;
}

 

 

guideline.h

 

 

#include <iostream>

void guideline(){
    std::cout<<"Not enough input parameters!\n\n";
    std::cout<<"Usage guide:\n\n";
    std::cout<<"First parameter:\tDevice code (as int number)\n";
    std::cout<<"\t\t1: CPU\n";
    std::cout<<"\t\t2: GPU\n";
    std::cout<<"\t\t3: FPGA\n";
    std::cout<<"Second parameter:\tCsv path of the dataset\n";
    std::cout<<"Available Ones:\n\n";
    std::cout<<"\t\t\"datasets/cit-Patents.csv\""<<std::endl;
    std::cout<<"\t\t\"datasets/soc-LiveJournal1.csv\""<<std::endl;
    std::cout<<"\t\t\"datasets/twitter-2010.csv\""<<std::endl;
    std::cout<<"\t\t\"datasets/web-uk-2005-all.csv\""<<std::endl;
    std::cout<<"Third parameter:\tThreshold (float value)\n";
    std::cout<<"Fourth parameter:\tDamping (float value)\n";
    std::cout<<"Fifth parameter:\tVerbose (int value)\n";
    std::cout<<"Prints all the ranks of each node\n";
    std::cout<<"Watch out! The print is huge\n";
    }

 

 

printTime.h

 

 

#include <iostream>
#include <chrono>

void print_time(std::chrono::time_point<std::chrono::steady_clock> start, std::chrono::time_point<std::chrono::steady_clock> end){
    std::cout << "Elapsed time in nanoseconds: " << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << " ns" << std::endl;
    std::cout << "Elapsed time in microseconds: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() << " µs" << std::endl;
    std::cout << "Elapsed time in milliseconds: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << " ms" << std::endl;
    std::cout << "Elapsed time in seconds: " << std::chrono::duration_cast<std::chrono::seconds>(end - start).count() << " sec" << std::endl;    
}
//STAMPA DEL TEMPO IMPIEGATO

 

 

 printVector.h

 

 

#include <iostream>
#include <vector>

template <typename T>
void printVector(std::vector<T> vector_like_var){
    for(int i  = 0; i < vector_like_var.size(); i++){
        std::cout<< "element " <<i+1 << "of vector:\t" << vector_like_var[i] <<std::endl;
    }
}

read_graph.h

#include <iostream>
#include <fstream>
#include <sstream>
#include <vector>
// #include "print_vector.h"


std::vector<int> getDegrees(const std::vector<std::vector<int>> &graph, int numNodes) {
    std::vector<int> degrees(numNodes);
    for (auto &edge : graph) {
        ++degrees[edge[0]];
        ++degrees[edge[1]];
    }
    return degrees;
}

std::vector<std::vector<int>> Read_graph(std::string file_name){
    // Apertura del file
    std::ifstream file(file_name);
    if (!file.is_open()) {
        std::cerr << "Impossibile aprire il file" << std::endl;
        return {};
    }

    // Lettura del file riga per riga
    std::string line;
    std::vector<std::vector<int>> graph;
    while (getline(file, line)) {
        std::stringstream ss(line);
        std::string cell;
        std::vector<int> edge;
        while (getline(ss, cell, ',')) {
            edge.push_back(stoi(cell));
        }
        graph.push_back(edge);
    }
    file.close();
    return graph;
}

int countNodes(std::vector<std::vector<int>> graph){
    int numNodes = 0;
    for(auto &i : graph){
        for(auto &j : i){
            numNodes = std::max(numNodes, j);
        }
    }
    return numNodes;
}

// int main(int argc, char* argv[]) {
//     if(argc < 1){return 0;}
//     else{
//         std::string csv_path = argv[1];
//         std::vector<std::vector<int>> graph = Read_graph(csv_path);
//         int numNodes  = countNodes(graph);
//         std::vector<int> degreesNodes = getDegrees(graph, numNodes+1);
//         printVector<int>(degreesNodes);

//         return 0;
//     }
// }

 

 

0 Kudos
1 Reply
SeshaP_Intel
Moderator
855 Views

Hi,


Since this is a duplicate thread of https://community.intel.com/t5/Intel-oneAPI-Data-Parallel-C/Compiled-program-Works-on-CPU-but-not-in-GPU-FPGA-and-cannot/m-p/1453024#M2769

we will no longer monitor this thread. We will continue addressing your issue in the other thread. 


Thanks and Regards,

Pendyala Sesha Srinivas


0 Kudos
Reply