Intel® oneAPI Threading Building Blocks
Ask questions and share information about adding parallelism to your applications when using this threading library.

help!! parallel_reduce output problem..

mblizz
Beginner
242 Views

I am just new to thread building block and also parallel computing. I am trying to implement a selection sort like algorithm i parallel, only that the searching of the mininum number is done in parallel, the swapping is done in serial. My problem is that the output is not the same in with the serial implementaion when the input data is in random in order but when my input is in reverse order, the output is correct and in ascending order. I can't figure out what is wrong with it. My code is here..

#include
#include
#include
#include
#include "tbb/task_scheduler_init.h"
#include "tbb/parallel_reduce.h"
#include "tbb/blocked_range.h"
#include "tbb/tick_count.h"

using namespace std;
using namespace tbb;


void swap( vector& array, long i, long j)
{
int temp;
temp = array;
array = array;
array = temp;
}

class PMinIndex{
vector my_a;
public:
long minIndex;
int minVal;
void operator()(const blocked_range& r){
vector a = my_a;
minIndex = r.begin();
minVal = a[minIndex];
for (long i = r.begin();i!=r.end();i++){
int val = a;
if (val < minVal){
minIndex = i;
minVal = val;
}
}
}

PMinIndex(PMinIndex &x, split):
my_a(x.my_a)
{}

void join(PMinIndex& y ) {
if (y.minVal <= minVal) {
minVal = y.minVal;
minIndex = y.minIndex;
}
}

PMinIndex(vector a):
my_a(a)
{}
};


void ParallelSelection( vector & list, long n ) {
vector backUp = list;
for (long i = 0;i PMinIndex pmi(backUp);
static affinity_partitioner ap;
parallel_reduce(blocked_range(0,backUp.size()), pmi, ap );
list = pmi.minVal;
//cout << pmi.minVal << endl;
backUp.erase(backUp.begin() + pmi.minIndex);
}
}


void SerialSelection(vector & list)
{
long i, j,minat;
int min;
for(i = 0; i<(list.size()-1); i++)
{
minat = i;
min = list;
//select the min of the rest of array
for(j = i+1;j < list.size(); j++)
{
//ascending order for descending reverse
if(min > list)
{
//the position of the min element
minat = j;
min = list;
}
}
//swap
//double temp = list;
//list = list[minat];
//list[minat]=temp;
//---or use the function swap()--
swap(list,i,minat);
}
}

int main(){
task_scheduler_init init;
vector sList, pList;
string fileInput, numInput;

cout << "Selection Sort\n" << "Enter filename: ";
cin >> fileInput;
ifstream infile(fileInput.c_str());
if (!infile){
cout << "error";
return 0;
}
cout << "Reading file... Please wait";
while (infile >> numInput){
//Filters the valid inputs
if (atoi(numInput.c_str()))
sList.push_back(atoi(numInput.c_str()));
}
infile.close();
pList = sList;

cout << endl << sList.size() << " numbers to sort\n";

cout << "Serial Implementation" << endl;
tick_count serial_t0 = tick_count::now();
SerialSelection(sList);
tick_count serial_t1 = tick_count::now();
cout << "Done with serial version\n";
cout << (serial_t1 - serial_t0).seconds()<< " seconds" << endl;


cout << "Parallel Implementation" << endl;
tick_count parallel_t0 = tick_count::now();
ParallelSelection(pList,pList.size());
tick_count parallel_t1 = tick_count::now();
cout << "Done with parallel version\n";
cout << (parallel_t1 - parallel_t0).seconds()<< " seconds" << endl;
ofstream out("s.txt");
ofstream outfil("p.txt");
for (long i = 0; i < sList.size(); ++i) {
if (sList != pList) {
cout << "ERROR: Serial and Parallel Results are Different!" << endl;
cout << sList << " " << pList << "position " << i << endl;
break;
}
}

for (long i = 0; i < pList.size(); ++i) {
outfil << pList << endl;

}
for (long i = 0; i < sList.size(); ++i) {
out << sList << endl;

}
cout << " Done validating results." << endl;
cout << "Resulting in a speedup of " << (serial_t1 - serial_t0).seconds() / (parallel_t1 - parallel_t0).seconds() << endl;

return 0;
}

Any help or explanation or suggestions would be very helpful and I would appreciate it.

0 Kudos
1 Reply
Alexey-Kukanov
Employee
242 Views
Might be the reason is that you use <= in join(), but < in operator() of the body class.
Note that you will not get any performance benefits of such "parallelization", since you copy whole vectors everywhere. Even erasing one element of a vector results in copying of all subsequent elements, possibly one-by-one. Pass vectors by reference, and use swappingas in the serial version. A tip: blocked range might begin with any index, not necessary 0.
0 Kudos
Reply