Intel® oneAPI Threading Building Blocks
Ask questions and share information about adding parallelism to your applications when using this threading library.

gaussian elimination

dineshparallel
Beginner
270 Views
#include
#include
#include
#include
#include
#include "tbb/tick_count.h"
#include "tbb/parallel_for.h"
#include "tbb/blocked_range2d.h"
#include "tbb/task_scheduler_init.h"
using namespace tbb;
using namespace std;
double** a;
double** z;
double** r;
double* f;
double* b;
double* y;
double x, sum;
int n,k,j,i;
double** matrixAlloc(int, int);
double** matAlloc(int,int);
double** matAlloc1(int,int);
double* vecAlloc1(int);
double* vecAlloc(int);
double* vectorAlloc(int);
void inita1(int);
void initb1(int);
void matrixFree(double**, int);
void matFree(double**, int);

void vectorFree(double*);
void vecFree(double*);
void matrixPrint(double**, int);
void print(double**,int);
void vectorPrint(double*, int);
void vectorPrint1(double*, int);

class abcs
{
public:
int inita2(double**)
{
cout << "Input matrix coefficients a(i,j)=" << endl;
for ( int i = 0; i < n; i++)
{
for ( int j = 0; j < n; j++)
{
r=z;
}
}
return 0;
}
int initb2(double*)
{
cout << "Input right-hand side vector b(i)" << endl;
for (int i = 0; i < n; i++)
{
f=y;
}
return 0;
}
void traingular()
{
for (int k=0;k {
if ( fabs(r)>=1.e-6)
{
for (int i=k+1;i {
x = r/r;
for (int j=k;j r = r -r*x;
f = f - f*x;
}
}
else
{
cout << "zero pivot found in line:" << k << endl;

for (int i=k; i {
// Check for Zero Rows
//*********************
bool check = true;
int NumZeroRows = 0;
for (int j=0; j {
if (r != 0)
{
check = false;
}
}

// Locate any values of Zero along the Diagonal
// and rearrange matrix to remove these
//**********************************************
double pivot = r;

if (pivot == 0)
{
cout << "Pivot is zero, therefore swap rows." << endl;

int newrow = 0;
double newpivot = 0;
for (int j=i; j {
cout << r << endl;
if ((fabs(r)) > newpivot)
{
newrow = j;
newpivot =r;
cout << newpivot << endl;
}
}
cout << newpivot << endl;
if (newpivot != 0)
{
cout << "Suitable row found." << endl;
for(int k=0; k {
//Swap row with 0 in position i for one without
double temp = r;
r=r[newrow];
r[newrow]=temp;//setElement(i, k, getElement(newrow, k));
//setElement(newrow,k,temp);
}
}
else if (newpivot == 0)
{
//New row not found
cout << "ERROR ZERO ELEMENT IN DIAGONAL ";
cout << "CANNOT FINISH GAUSSIAN ELIMINATION." << endl;
}


}

}
}
}
}
};


class abc

{
public:

// double setElement(int,int,double);
int inita(double**)
{
cout << "Input matrix coefficients a(i,j)=" << endl;
for ( int i = 0; i < n; i++)
{
for ( int j = 0; j < n; j++)
{
a=z;
}
}
return 0;
}
int initb(double*)
{
cout << "Input right-hand side vector b(i)" << endl;
for (int i = 0; i < n; i++)
{
b=y;
}
return 0;
}


void operator()( const blocked_range2d& r ) const
{
for (size_t k=r.rows().begin();k {
if ( fabs(a)>=1.e-6)
{
for ( int i=k+1; i!=r.rows().end(); ++i )
{
x = a/a;
for (size_t j=r.cols().begin(); j!=r.cols().end(); ++j )
a = a -a*x;
b = b - b*x;

}
}
else
{
cout << "zero pivot found in line:" << k << endl;
for (int i=k; i {
// Check for Zero Rows
//*********************
bool check = true;
int NumZeroRows = 0;
for (int j=0; j {
if (a != 0)
{
check = false;
}
}

// Locate any values of Zero along the Diagonal
// and rearrange matrix to remove these
//**********************************************
double pivot = a;

if (pivot == 0)
{
cout << "Pivot is zero, therefore swap rows." << endl;

int newrow = 0;
double newpivot = 0;
for (int j=i; j {
cout << a << endl;
if ((fabs(a)) > newpivot)
{
newrow = j;
newpivot =a;
cout << newpivot << endl;
}
}
cout << newpivot << endl;
if (newpivot != 0)
{
cout << "Suitable row found." << endl;
for(int k=0; k {
//Swap row with 0 in position i for one without
double temp = a;
a=a[newrow];
a[newrow]=temp;//setElement(i, k, getElement(newrow, k));
//setElement(newrow,k,temp);
}
}
else if (newpivot == 0)
{
//New row not found
cout << "ERROR ZERO ELEMENT IN DIAGONAL ";
cout << "CANNOT FINISH GAUSSIAN ELIMINATION." << endl;
}


}
/*cout << "zero pivot found in line:" << k << endl;
exit(1);*/
}
}
}
}
};


/*double abc::setElement(int x,int y,int z)
{

a=z;
return 0;
}*/
int main()
{
task_scheduler_init init;
abc g;
abcs g2;

cout << "******* gaussian elimination *******" << endl << endl;
cout << "Input number of equations n=" << endl;
cin >> n;
a = matrixAlloc(n, n);
b = vectorAlloc(n);
z=matAlloc(n,n);
y=vecAlloc(n);
r=matAlloc1(n,n);
f=vecAlloc(n);
inita1(n);
initb1(n);

g.inita(z);
g.initb(y);
g2.inita2(z);
g2.initb2(y);
cout << endl << endl;
cout << "Coefficient matrix A:" << endl;
//matrixPrint(a, n);
cout << "Right-hand-side vector b:" << endl;
//vectorPrint(b, n);
tick_count s0=tick_count::now();
g2.traingular();
f[n-1]=f[n-1]/r[n-1][n-1];
for ( int i = n-2; i >= 0; i--)
{
sum = f;
for (int j = i+1; j < n; j++)
sum = sum - r*f;
f = sum/r;
}

tick_count s1=tick_count::now();

cout<<"solution for serial";
//vectorPrint1(f, n);
cout<<"time for serial"<<(s1-s0).seconds()<<"seconds";


tick_count t0=tick_count::now();
// convert to upper triangular form

parallel_for( blocked_range2d(0, n,16, 0, n,32),g);
print(a,n);

// back substituti n
b[n-1]=b[n-1]/a[n-1][n-1];
for ( int i = n-2; i >= 0; i--)
{
sum = b;
for (int j = i+1; j < n; j++)
sum = sum - a*b;
b = sum/a;
}

tick_count t1=tick_count::now();
cout << "Solution:" << endl;
vectorPrint(b, n);

matrixFree(a, n);
vectorFree(b);
cout<<"time for action is"<<(t1-t0).seconds()<<"seconds";
/*for(int i=0;i{
if(f==b)
{
cout<<"good";
}
else
{
cout<<"bad";
}
}*/
return 0;
}
void inita1(int n)
{
cout << "Input matrix coefficients a(i,j)=" << endl;
for ( int i = 0; i < n; i++)
{
for ( int j = 0; j < n; j++)
{
z=rand()%100+1;
}
}

}
void initb1(int n)
{
cout << "Input right-hand side vector b(i)" << endl;
for (int i = 0; i < n; i++)
{
y=rand()%100+1;
}

}


double** matrixAlloc(int n, int m)
{
double** p;
try
{
p = new double* ;
for ( int i = 0; i < n; i++ )
p = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}
return p;
}

double* vectorAlloc(int n)
{
double* p;

try
{
p = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}

return p;
}
double** matAlloc(int n, int m)
{
double** q;
try
{
q = new double* ;
for ( int i = 0; i < n; i++ )
q = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}
return q;
}

double* vecAlloc(int n)
{
double* q;

try
{
q = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}

return q;
}
double** matAlloc1(int n, int m)
{
double** p;
try
{
p = new double* ;
for ( int i = 0; i < n; i++ )
p = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}
return p;
}

double* vecAlloc1(int n)
{
double* p;

try
{
p = new double;
}
catch (bad_alloc e)
{
cout << "Exception occurred: "
<< e.what() << endl;
}

return p;
}




void matrixFree(double** p, int n)
{
for ( int i = 0; i < n; i++)
delete[] p;
delete[] p;
p = 0;
}

void vecFree(double* p)
{
delete[] p;
p = 0;
}
void matFree(double** p, int n)
{
for ( int i = 0; i < n; i++)
delete[] p;
delete[] p;
p = 0;
}

void vectorFree(double* p)
{
delete[] p;
p = 0;
}


void matrixPrint(double** p, int n)
{
for ( int i = 0; i < n; i++)
{
for ( int j = 0; j < n; j++)
cout << setiosflags(ios::showpoint | ios::fixed | ios::right)
<< setprecision(4)
<< setw(12) << p;
cout << endl;
}
}

void vectorPrint(double* p, int n)
{
for ( int i = 0; i < n; i++)
{
cout << setiosflags(ios::showpoint | ios::fixed | ios::right)
<< setprecision(4)
<< setw(12) << p;
cout << endl << endl;
}
}
void vectorPrint1(double* p, int n)
{
for ( int i = 0; i < n; i++)
{
cout << setiosflags(ios::showpoint | ios::fixed | ios::right)
<< setprecision(4)
<< setw(12) << p;
cout << endl << endl;
}
}
void print(double** p, int n)
{
for ( int i = 0; i < n; i++)
{
for ( int j = 0; j < n; j++)
cout << setiosflags(ios::showpoint | ios::fixed | ios::right)
<< setprecision(4)
<< setw(12) << p;
cout << endl;
}
}


The above code consists of both serial and parallel code. After executing and entering martix size the results of serial and parallel program will be correct only when matrix size(n) should be equal or must be greater.

while giving the matrix size 1024x1024 and grain size should be 1024 in blocked_range2d, by specifying the grain size like this parallel program consuming more time than seial.The reason behind is when specifying grain size 16x32 the blocked range split the matrix and send it to two cores, but in my code a[0][0] must perfrom operation on every row present in the matrix. Is it possible to generate correct result in parallel by using above code.
0 Kudos
0 Replies
Reply