Community
cancel
Showing results for 
Search instead for 
Did you mean: 
michele86
Beginner
77 Views

OpenMP vs Windows APIs

Hi everybody! I'm doing a comparison between OpenMP and Windows API performances over two versions of the same "producer - consumer" problem: there are n producers, m consumers and a shared buffer (an int array) of NUM positions. I'm doing the test with an Intel Core Duo and Windows Vista Home Premium. I've downloaded the Intel C++ Compiler (evaluation version), and I use it by prompt. Well, when I run programs specifying only one producer and one consumer the OpenMp version is more efficient, in the other hand when I specify a number of threads >2 OpenMP's performances fly down like an hungry eagle, while WinAPI's ones are still good. I've also tried to run both versions at the same time, observing that the WinAPI version used more than 80% of the CPU, producing over 10 times data compared to the OpenMP one. To sincronyze threads within the WinAPI version I use critical sections and condition variables , while I use #pragma omp critical (...) and #pragma omp flush (...) within the OpenMP one.

Can anybody explain to me the causes of those differences?
How does OpenMP create threads at runtime?
Why does they have less priority than Windows brothers?

Thank you very much!
Michele.
0 Kudos
4 Replies
jimdempseyatthecove
Black Belt
77 Views

Michele,

Could you post your test programs (both versions)?

Jim Dempsey

michele86
Beginner
77 Views

I think performances go down because of the high number of flush operations, but an OpenMP.org developer told me that this is the only way to avoid race-conditions (is it correct?).
I tried to find a way to achieve an exclusive access to the shared variable within while-conditions, but I didn't succeed.
Thank you very much for your help!

OpenMP version:

#include
#include
#include
#include
#define NUM 10 //buffer size
#define TIME 3000

int main(int argc, char *argv[]){

int buffer[NUM];
int start; //index of the first element a consumer has to consume
int end; //index of the first free position in the buffer
unsigned long PTemp; //the new element the producer has to insert in the buffer
unsigned long CTemp;
int full,empty;
int dataNum; //number of data in the buffer
unsigned long dataCNum; //number of data consumed
int np,nc,finished;
double time_start,time_end;

CTemp=PTemp=dataCNum=0;
start=end=full=dataNum=np=nc=finished=0;
empty=1;

if(argc!=3){
printf("You have to specify the number of producers and the number of consumers! ");
return 1;
}

np=atoi(argv[1]); //number of producers
nc=atoi(argv[2]); //number of consumers

omp_set_nested(2);

time_start=clock();

#pragma omp parallel sections num_threads(3)
{
#pragma omp section //producer's code
{
#pragma omp parallel num_threads(np)
while(!finished){
#pragma omp flush(full,finished)
while(full&&!finished){
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum full=0;
}
#pragma omp flush(full,finished)
}
}
#pragma omp critical (control)
{
#pragma omp flush(finished)
if(!finished){
#pragma omp flush(dataNum)
if(dataNum #pragma omp flush(PTemp)
PTemp++;
#pragma omp flush(PTemp,end)
buffer[end]=PTemp;
#pragma omp flush(dataNum)
dataNum++;
#pragma omp flush(dataNum)
//printf("PRODUCER %d: produced data = %d (dataNum = %d) ",omp_get_thread_num(),PTemp,dataNum);
#pragma omp flush(end)
end=(end+1)%NUM;
#pragma omp flush(end)
}
else{
full=1;
#pragma omp flush(full)
}
}//if !finished
#pragma omp flush(finished)
}//critical control
}//while
}//section
#pragma omp section //consumer's code
{
#pragma omp parallel num_threads(nc) private(CTemp)
while(!finished||dataNum>0){
#pragma omp flush(empty,finished)
while(empty&&!finished){
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum>0){
empty=0;
#pragma omp flush(empty,finished)
}
}
}
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum>0){
#pragma omp flush(start)
CTemp=buffer[start];
#pragma omp flush(dataNum)
&nb sp; dataNum--;
#pragma omp flush(dataNum)
//printf("CONSUMER %d: consumed data = %d (dataNum= %d) ",omp_get_thread_num(),CTemp,dataNum);
#pragma omp flush(start)
start=(start+1)%NUM;
#pragma omp flush(start,dataCNum)
dataCNum++;
#pragma omp flush(dataCNum)
}
else{
empty=1;
#pragma omp flush(empty)
}
#pragma omp flush(finished)
}//critical control
}//while
}//section
#pragma omp section
{
Sleep(TIME);
//printf("Tempo scaduto!! ");
#pragma omp critical (control)
{
finished=1;
#pragma omp flush(finished)
}
}//section
}//sections
time_end=clock();
printf("Time = %.0f millisec - data produced= %u , data consumed= %u ",time_end-time_start,PTemp,dataCNum);
return 0;
}


WinAPI version:


#include
#include
#include
#include
#define NUM 10
#define TIME 3000
#define NP 10
#define NC 10

int buffer[NUM];
int start,end,dataNum,PTemp,stop;
unsigned long dataCNum,dataPNum;
CRITICAL_SECTION CritSect;
CONDITION_VARIABLE full,empty;

DWORD WINAPI start_producer(PVOID p){
ULONG Pid=(ULONG)(ULONG_PTR)p;
while(1){
EnterCriticalSection(&CritSect);
while(dataNum>=NUM && !stop){
SleepConditionVariableCS(&full,&CritSect,INFINITE);
}
if(stop){
LeaveCriticalSection(&CritSect);
break;
}
PTemp++;
buffer[end]=PTemp;
dataNum++;
//printf("***Producer %d : produced datum = %d, dataNum = %d ",Pid,PTemp,dataNum);
end=(end+1)%NUM;
dataPNum++;
LeaveCriticalSection(&CritSect);
WakeConditionVariable(∅);
}
  ; //printf("***Producer %d : finished!! ",Pid);
return 0;
}

DWORD WINAPI start_consumer(PVOID p){
int CTemp;
ULONG Cid=(ULONG)(ULONG_PTR)p;
while(1){
EnterCriticalSection(&CritSect);
while(dataNum<=0 && !stop){
SleepConditionVariableCS(∅,&CritSect,INFINITE);
}
if(stop && dataNum<=0){
LeaveCriticalSection(&CritSect);
break;
}
CTemp=buffer[start];
dataNum--;
//printf("Consumer %d : consumed datum = %d, dataNum = %d ",Cid,CTemp,dataNum);
start=(start+1)%NUM;
dataCNum++;
LeaveCriticalSection(&CritSect);
WakeConditionVariable(&full);
//Sleep (rand() % CONSUMER_SLEEP_TIME_MS);
}
//printf("Consumer %d : finished!! ",Cid);
return 0;
}


int main(int argc, char *argv[]){

HANDLE producers[NP], consumers[NC];
double time_start,time_end;
start=end=dataNum=PTemp=stop=dataCNum=dataPNum=0;

InitializeConditionVariable(&full);
InitializeConditionVariable(∅);
InitializeCriticalSection(&CritSect);

DWORD id;
EnterCriticalSection(&CritSect);
for(int i=0;i producers=CreateThread(NULL, 0, start_producer, (PVOID)i, 0, &id);
}
for(int i=0;i consumers=CreateThread(NULL, 0, start_consumer, (PVOID)i, 0, &id);
}
LeaveCriticalSection(&CritSect);

time_start=clock();
Sleep(TIME);
EnterCriticalSection(&CritSect);
stop=1;
LeaveCriticalSection(&CritSect);

WakeAllConditionVariable(&full);
WakeAllConditionVariable(∅);

for(int i=0;i WaitForSingleObject(producers,INFINITE);
}
for(int i=0;i WaitForSingleObject(consumers,INFINITE);
}
time_end=clock();
printf("Time = %.0f millisec - data produced = %u - data consumed = %u ",time_end-time_start,dataPNum,dataCNum);
return 0;
}
jimdempseyatthecove
Black Belt
77 Views


Quoting - michele86

I think performances go down because of the high number of flush operations, but an OpenMP.org developer told me that this is the only way to avoid race-conditions (is it correct?).
I tried to find a way to achieve an exclusive access to the shared variable within while-conditions, but I didn't succeed.
Thank you very much for your help!

OpenMP version:

#include
#include
#include
#include
#define NUM 10 //buffer size
#define TIME 3000

int main(int argc, char *argv[]){

int buffer[NUM];
int start; //index of the first element a consumer has to consume
int end; //index of the first free position in the buffer
unsigned long PTemp; //the new element the producer has to insert in the buffer
unsigned long CTemp;
int full,empty;
int dataNum; //number of data in the buffer
unsigned long dataCNum; //number of data consumed
int np,nc,finished;
double time_start,time_end;

CTemp=PTemp=dataCNum=0;
start=end=full=dataNum=np=nc=finished=0;
empty=1;

if(argc!=3){
printf("You have to specify the number of producers and the number of consumers! ");
return 1;
}

np=atoi(argv[1]); //number of producers
nc=atoi(argv[2]); //number of consumers

omp_set_nested(2);

time_start=clock();

#pragma omp parallel sections num_threads(3)
{
#pragma omp section //producer's code
{
#pragma omp parallel num_threads(np)
while(!finished){
#pragma omp flush(full,finished)
while(full&&!finished){
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum full=0;
}
#pragma omp flush(full,finished)
}
}
#pragma omp critical (control)
{
#pragma omp flush(finished)
if(!finished){
#pragma omp flush(dataNum)
if(dataNum #pragma omp flush(PTemp)
PTemp++;
#pragma omp flush(PTemp,end)
buffer[end]=PTemp;
#pragma omp flush(dataNum)
dataNum++;
#pragma omp flush(dataNum)
//printf("PRODUCER %d: produced data = %d (dataNum = %d) ",omp_get_thread_num(),PTemp,dataNum);
#pragma omp flush(end)
end=(end+1)%NUM;
#pragma omp flush(end)
}
else{
full=1;
#pragma omp flush(full)
}
}//if !finished
#pragma omp flush(finished)
}//critical control
}//while
}//section
#pragma omp section //consumer's code
{
#pragma omp parallel num_threads(nc) private(CTemp)
while(!finished||dataNum>0){
#pragma omp flush(empty,finished)
while(empty&&!finished){
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum>0){
empty=0;
#pragma omp flush(empty,finished)
}
}
}
#pragma omp critical (control)
{
#pragma omp flush(dataNum)
if(dataNum>0){
#pragma omp flush(start)
CTemp=buffer[start];
#pragma omp flush(dataNum)
&nb sp; dataNum--;
#pragma omp flush(dataNum)
//printf("CONSUMER %d: consumed data = %d (dataNum= %d) ",omp_get_thread_num(),CTemp,dataNum);
#pragma omp flush(start)
start=(start+1)%NUM;
#pragma omp flush(start,dataCNum)
dataCNum++;
#pragma omp flush(dataCNum)
}
else{
empty=1;
#pragma omp flush(empty)
}
#pragma omp flush(finished)
}//critical control
}//while
}//section
#pragma omp section
{
Sleep(TIME);
//printf("Tempo scaduto!! ");
#pragma omp critical (control)
{
finished=1;
#pragma omp flush(finished)
}
}//section
}//sections
time_end=clock();
printf("Time = %.0f millisec - data produced= %u , data consumed= %u ",time_end-time_start,PTemp,dataCNum);
return 0;
}


WinAPI version:


#include
#include
#include
#include
#define NUM 10
#define TIME 3000
#define NP 10
#define NC 10

int buffer[NUM];
int start,end,dataNum,PTemp,stop;
unsigned long dataCNum,dataPNum;
CRITICAL_SECTION CritSect;
CONDITION_VARIABLE full,empty;

DWORD WINAPI start_producer(PVOID p){
ULONG Pid=(ULONG)(ULONG_PTR)p;
while(1){
EnterCriticalSection(&CritSect);
while(dataNum>=NUM && !stop){
SleepConditionVariableCS(&full,&CritSect,INFINITE);
}
if(stop){
LeaveCriticalSection(&CritSect);
break;
}
PTemp++;
buffer[end]=PTemp;
dataNum++;
//printf("***Producer %d : produced datum = %d, dataNum = %d ",Pid,PTemp,dataNum);
end=(end+1)%NUM;
dataPNum++;
LeaveCriticalSection(&CritSect);
WakeConditionVariable(∅);
}
; //printf("***Producer %d : finished!! ",Pid);
return 0;
}

DWORD WINAPI start_consumer(PVOID p){
int CTemp;
ULONG Cid=(ULONG)(ULONG_PTR)p;
while(1){
EnterCriticalSection(&CritSect);
while(dataNum<=0 && !stop){
SleepConditionVariableCS(∅,&CritSect,INFINITE);
}
if(stop && dataNum<=0){
LeaveCriticalSection(&CritSect);
break;
}
CTemp=buffer[start];
dataNum--;
//printf("Consumer %d : consumed datum = %d, dataNum = %d ",Cid,CTemp,dataNum);
start=(start+1)%NUM;
dataCNum++;
LeaveCriticalSection(&CritSect);
WakeConditionVariable(&full);
//Sleep (rand() % CONSUMER_SLEEP_TIME_MS);
}
//printf("Consumer %d : finished!! ",Cid);
return 0;
}

int main(int argc, char *argv[]){

HANDLE producers[NP], consumers[NC];
double time_start,time_end;
start=end=dataNum=PTemp=stop=dataCNum=dataPNum=0;

InitializeConditionVariable(&full);
InitializeConditionVariable(∅);
InitializeCriticalSection(&CritSect);

DWORD id;
EnterCriticalSection(&CritSect);
for(int i=0;i producers=CreateThread(NULL, 0, start_producer, (PVOID)i, 0, &id);
}
for(int i=0;i consumers=CreateThread(NULL, 0, start_consumer, (PVOID)i, 0, &id);
}
LeaveCriticalSection(&CritSect);

time_start=clock();
Sleep(TIME);
EnterCriticalSection(&CritSect);
stop=1;
LeaveCriticalSection(&CritSect);

WakeAllConditionVariable(&full);
WakeAllConditionVariable(∅);

for(int i=0;i WaitForSingleObject(producers,INFINITE);
}
for(int i=0;i WaitForSingleObject(consumers,INFINITE);
}
time_end=clock();
printf("Time = %.0f millisec - data produced = %u - data consumed = %u ",time_end-time_start,dataPNum,dataCNum);
return 0;
}


Michele,

I wish to apologize for taking so long to reply to your post. I had other pressing issues to resolve.

Your shared variables should contain "volatile" (Example: volatile int datNum;)
The purpose of this is to provide the compiler with the information that the value of the variable may unexpectedly change and therefor the compiler's code optimization should not place the value into a register but instead, obtain a new copy of the variable upon every reference.

Entry into and exit from critical sections have an implicit memory barrier (i.e. flushes occure when crossing barrier).
Therefor, the flushes to push/pull the values to/from other threads is not required (assuming same named critical section).

Consider using a macro to define "full"

#define full (dataNum==NUM)

remember to remove the int declaration for full.

Rewrite the section that clears full (full is a macro now)

while(!finished){
while(full&&!finished)
SwitchToThread();


#pragma omp critical (control)
{
if(!finished){
if(!full){
PTemp++;
buffer[end]=PTemp;
dataNum++;
//printf("PRODUCER %d: produced data = %d (dataNum = %d) ",omp_get_thread_num(),PTemp,dataNum);
end=(end+1)%NUM;
}
}//if !finished
}//critical control
}//while

For short duration waits use _mm_pause();, SwitchToThread();, or Sleep(0);

When the number of running threads is = When the number of runningtheads is > the number of cores then consider using SwitchToThread(); as it has less overhead than Sleep(0); Sleep(0) will permit threads to migrate from core to core (if that is desireable) whereas SwitchToThread tends to place the running thread at the tail end of threads waiting to run on the core at the time of the call to SwitchToThread.

Jim Dempsey
michele86
Beginner
77 Views

Thank you very much for your help! It has been sooo useful!

Michele

Reply