Intel® C++ Compiler
Support and discussions for creating C++ code that runs on platforms based on Intel® processors.
Announcements
Welcome to the Intel Community. If you get an answer you like, please mark it as an Accepted Solution to help others. Thank you!
7568 Discussions

OpenMP parallelization and compiler options

Guan__Zhen
Beginner
109 Views

Fellow code developers, 

I've got several years of experience in code parallelization with mpi. Recently I begin to use OpenMP, and I quickly get a lot of problems. Right now the most troubling one is the intel compiler's owen optimization and the my hand-written OpenMP parallelization. Allow me to demonstrate the problem with a simple case:

Suppose we have two functions. Each function loops over a vector and there is no data dependency between these two functions. Now I use openmp to create two threads and let each thread handle one of these two functions. In theory, we should observe that the wall time for the two-thread version is half compared to the one-thread version. In my experiment, this claim is true only when the compiler optimization flag is set to be -O0. If the flag is -O1,it is not valid anymore.

If anyone can offer some insight of the problem, it will be greatly appreciated.

This is the test code:

main.cpp

#include <iostream>

#include <omp.h>

#include <vector>

#include <stdio.h>

#include <chrono>

 

#include "tools.h"

 

#define N 60000000

 

using namespace std;

using namespace chrono;

 

void func(int i, vector<vector<int> > &data) {

for (int j=1; j<N; ++j) {

data = data[j-1] + data;

}

}

 

int main(int argc, char *argv[]) {

vector<vector<int> > data(24, vector<int>(N, 1));

 

string hostName, Ip;

if (GetHostInfo(hostName, Ip)) {

}

cout << "hostname: " << hostName << ", ip: " << Ip << endl;

 

auto start = system_clock::now();

 

#pragma omp parallel shared(data)

{

#pragma omp sections

{

#pragma omp section

func(0, data);

#pragma omp section

func(1, data);

#pragma omp section

func(2, data);

#pragma omp section

func(3, data);

#pragma omp section

func(4, data);

#pragma omp section

func(5, data);

#pragma omp section

func(6, data);

#pragma omp section

func(7, data);

#pragma omp section

func(8, data);

#pragma omp section

func(9, data);

#pragma omp section

func(10, data);

#pragma omp section

func(11, data);

#pragma omp section

func(12, data);

#pragma omp section

func(13, data);

#pragma omp section

func(14, data);

#pragma omp section

func(15, data);

#pragma omp section

func(16, data);

#pragma omp section

func(17, data);

#pragma omp section

func(18, data);

#pragma omp section

func(19, data);

#pragma omp section

func(20, data);

#pragma omp section

func(21, data);

#pragma omp section

func(22, data);

#pragma omp section

func(23, data);

}

}

 

auto end = system_clock::now();

auto duration = duration_cast<microseconds>(end-start);

cout << "time: " << double(duration.count()) * microseconds::period::num / microseconds::period::den << "s\n";

return 0;

}

tools.h

#include <iostream> /* cout */

#include <unistd.h>/* gethostname */

#include <netdb.h> /* struct hostent */

#include <arpa/inet.h> /* inet_ntop */

#include <stdlib.h> /* system */

 

bool GetHostInfo(std::string& hostName, std::string& Ip) {

char name[256];

gethostname(name, sizeof(name));

hostName = name;

 

struct hostent* host = gethostbyname(name);

char ipStr[32];

const char* ret = inet_ntop(host->h_addrtype, host->h_addr_list[0], ipStr, sizeof(ipStr));

if (NULL==ret) {

std::cout << "hostname transform to ip failed";

return false;

}

Ip = ipStr;

return true;

}

/*

int main(int argc, char *argv[]) {

std::string hostName;

std::string Ip;

 

bool ret = GetHostInfo(hostName, Ip);

if (true == ret) {

std::cout << "hostname: " << hostName << std::endl;

std::cout << "Ip: " << Ip << std::endl;

}

system("cat /proc/cpuinfo | grep 'core id'");

return 0;

}

*/

 

 

0 Kudos
0 Replies
Reply