- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Time for calculation (800 frames): 1.689512 (s)Thank you for any help you can give.
Time for one frame: 0.002112 (s)
Frequency: 473.509565
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 1.929452 (s)
Time for one frame: 0.002412 (s)
IPP Frequency: 414.625492
maxError: 1
Eric
#ifdef WIN32
#include
#else
#include
#include
#endif
#include
#include
#include
#include
#define YDIM 1280
#define XDIM 150
#define LINES 80
#define MAX_USHORT 65535
#define TIMING 10
#define MAX(a,b) (((a)<(b))?(b):(a))
#define MIN(a,b) (((a)>(b))?(b):(a))
double randomu();
double GetTimeOfDay(void);
void libinfo(void) {
const IppLibraryVersion* lib = ippiGetLibVersion();
printf("============================ ");
printf("%s %s %d.%d.%d.%d ", lib->Name, lib->Version,
lib->major, lib->minor, lib->majorBuild, lib->build);
printf("============================ ");}
int main(int argc, char *argv[]) {
int ii,jj;
unsigned short *outData1,*outData2,*data,*subtractM;
static int tmpVal;
float *multiplyM;
double start,stop;
Ipp16u *tmpInt,*tmpSub;
Ipp32f *tmpFlt,*tmpMult;
int intStep,fltStep,subStep,multStep;
IppiSize roi={XDIM,YDIM};
int maxError=0;
data=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
outData1=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
outData2=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
subtractM=malloc(YDIM*XDIM*sizeof(unsigned short));
multiplyM=malloc(YDIM*XDIM*sizeof(float));
//Generate random data
for(ii=0;ii
data[ii]=(unsigned short)(10000.0 + (2.0*randomu()-1.0)*2500.0);
for(ii=0;ii
subtractM[ii]=(unsigned short)(1000.0 + (2.0*randomu()-1.0)*200.0);
for(ii=0;ii
multiplyM[ii]=(float)(1.0 + (2.0*randomu()-1.0)*0.2);
/////////////////////////////////////////////////////
// Do the calculation
/////////////////////////////////////////////////////
start=GetTimeOfDay();
for(ii=0;ii
for(jj=0;jj
// Subtract and multiply
tmpVal=(int)((data[(ii%LINES)*YDIM*XDIM+jj]-subtractM[jj])*multiplyM[jj]);//+0.5;
// Data checks to make sure the data is still in range.
if ((tmpVal < 0) || (tmpVal > MAX_USHORT)) {
const int a=tmpVal;
tmpVal=MAX(0,a);
tmpVal=MIN(MAX_USHORT,a);
}
// Implicit conversion to short
outData1[(ii%LINES)*YDIM*XDIM+jj]=tmpVal;
}
}
stop=GetTimeOfDay();
/////////////////////////////////////////////////////
// End calculation
/////////////////////////////////////////////////////
printf("Time for calculation (%i frames): %f (s) ",TIMING*LINES, stop-start);
printf("Time for one frame: %f (s) ",(stop-start)/(TIMING*LINES));
printf("Frequency: %f ",(LINES*TIMING)/(stop-start));
/////////////////////////////////////////////////////
// Do th e calculation with IPP
/////////////////////////////////////////////////////
libinfo();
tmpInt=ippiMalloc_16s_C1(XDIM,YDIM,&intStep);
tmpFlt=ippiMalloc_32f_C1(XDIM,YDIM,&fltStep);
tmpSub=ippiMalloc_16u_C1(XDIM,YDIM,&subStep);
tmpMult=ippiMalloc_32f_C1(XDIM,YDIM,&multStep);
ippiCopy_16u_C1R(subtractM,XDIM*sizeof(short),tmpSub,subStep,roi);
ippiCopy_32f_C1R(multiplyM,XDIM*sizeof(float),tmpMult,multStep,roi);
start=GetTimeOfDay();
for(ii=0;ii
//Subtract
ippiSub_16u_C1RSfs(tmpSub,subStep,&data[(ii%LINES)*YDIM*XDIM],XDIM*sizeof(short),tmpInt,intStep,roi,0);
//Convert data to floats
ippiConvert_16s32f_C1R(tmpInt,intStep,tmpFlt,fltStep,roi);
//Multiply
ippiMul_32f_C1IR(tmpMult,multStep,tmpFlt,fltStep,roi);
//Threshold for converting back to shorts
ippiThreshold_GTVal_32f_C1IR(tmpFlt,fltStep,roi,MAX_USHORT-1,MAX_USHORT-1);
ippiThreshold_LTVal_32f_C1IR(tmpFlt,fltStep,roi,0,0);
//Convert back to shorts
ippiConvert_32f16u_C1R(tmpFlt,fltStep,&outData2[(ii%LINES)*YDIM*XDIM],
XDIM*sizeof(short),roi,ippRndZero);
}
stop=GetTimeOfDay();
ippiFree(tmpInt);
ippiFree(tmpMult);
ippiFree(tmpSub);
ippiFree(tmpFlt);
/////////////////////////////////////////////////////
// End calculation
/////////////////////////////////////////////////////
printf("Time for IPP calculation (%i frames): %f (s) ",TIMING*LINES, stop-start);
printf("Time for one frame: %f (s) ",(stop-start)/(TIMING*LINES));
printf("IPP Frequency: t %f ",(LINES*TIMING)/(stop-start));
for(jj=0;jj
if (abs(outData1[jj]-outData2[jj]) > maxError)
maxError=abs(outData1[jj]-outData2[jj]);
}
printf("maxError: %i ",maxError);
free(subtractM);
free(multiplyM);
free(data);
free(outData1);
free(outData2);
return 0;
}
double randomu() {
return (double)rand()/(double)RAND_MAX;
}
double GetTimeOfDay(void) {
#ifdef WIN32
static LARGE_INTEGER freq; LARGE_INTEGER time;
if(freq.QuadPart==0) QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&time);
return (double)time.QuadPart/freq.QuadPart;
#else
struct timeval currTime;
gettimeofday(&currTime,NULL);
return((double)((double)currTime.tv_sec+
(double)currTime.tv_usec/1000000.0));
#endif
}
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Eric
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Time for calculation (800 frames): 0.776799 (s)With Intel's compiler:
Time for one frame: 0.000971 (s)
Frequency: 1029.867492
============================
libippim7.so.5.2 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 2.083939 (s)
Time for one frame: 0.002605 (s)
IPP Frequency: 383.888382
maxError: 0
Time for calculation (800 frames): 1.013748 (s)
Time for one frame: 0.001267 (s)
Frequency: 789.150809
============================
libippim7.so.5.2 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 2.038634 (s)
Time for one frame: 0.002548 (s)
IPP Frequency: 392.419664
maxError: 0
The only notable compiler option I am using is -O2.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hello,
an IPP expert recommend you to change code in the following way (to optimize memory access):
#ifdef WIN32
#include
#else
#include
#include
#endif
#include
#include
#include
#include
#define YDIM 1280
#define XDIM 150
#define LINES 80
#define MAX_USHORT 65535
#define TIMING 10
#define MAX(a,b) (((a)<(b))?(b):(a))
#define MIN(a,b) (((a)>(b))?(b):(a))
double randomu();
double GetTimeOfDay(void);
void libinfo(void) {
const IppLibraryVersion* lib = ippiGetLibVersion();
printf("============================ ");
printf("%s %s %d.%d.%d.%d ", lib->Name, lib->Version,
lib->major, lib->minor, lib->majorBuild, lib->build);
printf("============================ ");}
int
main(int argc, char *argv[]) { int ii,jj, i;
unsigned short *outData1,*outData2,*data,*subtractM;
static int tmpVal;
float *multiplyM;
double start,stop;
Ipp16u *tmpInt,*tmpSub;
Ipp32f *tmpFlt,*tmpMult;
int intStep,fltStep,subStep,multStep;
IppiSize roi={XDIM,YDIM};
int maxError=0;
data=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
outData1=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
outData2=malloc(YDIM*LINES*XDIM*sizeof(unsigned short));
subtractM=malloc(YDIM*XDIM*sizeof(unsigned short));
multiplyM=malloc(YDIM*XDIM*sizeof(float));
//Generate random data
for(ii=0;ii
data[ii]=(unsigned short)(10000.0 + (2.0*randomu()-1.0)*2500.0);
for(ii=0;ii
subtractM[ii]=(unsigned short)(1000.0 + (2.0*randomu()-1.0)*200.0);
for(ii=0;ii
multiplyM[ii]=(float)(1.0 + (2.0*randomu()-1.0)*0.2);
/////////////////////////////////////////////////////
// Do the calculation
/////////////////////////////////////////////////////
start=GetTimeOfDay();
for(ii=0;ii
for(jj=0;jj
// Subtract and multiply
tmpVal=(int)((data[(ii%LINES)*YDIM*XDIM+jj]-subtractM[jj])*multiplyM[jj]);//+0.5;
// Data checks to make sure the data is still in range.
if ((tmpVal < 0) || (tmpVal > MAX_USHORT)) {
const int a=tmpVal;
tmpVal=MAX(0,a);
tmpVal=MIN(MAX_USHORT,a);
}
// Implicit conversion to short
outData1[(ii%LINES)*YDIM*XDIM+jj]=tmpVal;
} }
stop=GetTimeOfDay();
/////////////////////////////////////////////////////
// End calculation
/////////////////////////////////////////////////////
printf("Time for calculation (%i frames): %f (s) ",TIMING*LINES, stop-start);
printf("Time for one frame: %f (s) ",(stop-start)/(TIMING*LINES));
printf("Frequency: %f ",(LINES*TIMING)/(stop-start));
/////////////////////////////////////////////////////
// Do the calculation with IPP
/////////////////////////////////////////////////////
libinfo();
tmpInt=ippiMalloc_16u_C1(XDIM,YDIM,&intStep);
tmpFlt=ippiMalloc_32f_C1(XDIM,YDIM,&fltStep);
tmpSub=ippiMalloc_16u_C1(XDIM,YDIM,&subStep);
tmpMult=ippiMalloc_32f_C1(XDIM,YDIM,&multStep);
ippiCopy_16u_C1R(subtractM,XDIM*
sizeof (short),tmpSub,subStep,roi);ippiCopy_32f_C1R(multiplyM,XDIM*sizeof(float),tmpMult,multStep,roi);
start=GetTimeOfDay();
for(ii=0;ii
for( i = 0; i < YDIM; i++ ){
Ipp16u *tmpin = &data[(ii%LINES)*YDIM*XDIM] + i * XDIM;
Ipp16u *tmpint = (Ipp16u*)((Ipp8u*)tmpInt + intStep * i );
Ipp16u *tmpins = (Ipp16u*)((Ipp8u*)tmpSub + subStep * i );
Ipp32f *tmpflt = tmpFlt;//(Ipp32f*)((Ipp8u*)tmpFlt + fltStep * i );
Ipp32f *tmpmul = (Ipp32f*)((Ipp8u*)tmpMult + multStep * i );
Ipp16u *tmpout = &outData2[(ii%LINES)*YDIM*XDIM] + i * XDIM;
ippsSub_16u_Sfs( tmpins, tmpin, tmpint, XDIM, 0 );
ippsConvert_16u32f( tmpint, tmpflt, XDIM );
ippsMul_32f_I( tmpmul, tmpflt, XDIM );
ippsConvert_32f16u_Sfs( tmpflt, tmpout, XDIM, ippRndZero, 0 );
}
}
stop=GetTimeOfDay();
ippiFree(tmpInt);
ippiFree(tmpMult);
ippiFree(tmpSub);
ippiFree(tmpFlt);
/////////////////////////////////////////////////////
// End calculation
/////////////////////////////////////////////////////
printf("Time for IPP calculation (%i frames): %f (s) ",TIMING*LINES, stop-start);
printf("Time for one frame: %f (s) ",(stop-start)/(TIMING*LINES));
printf("IPP Frequency: %f ",(LINES*TIMING)/(stop-start));
for(jj=0;jj
if (abs(outData1[jj]-outData2[jj]) > maxError)< p="">>
maxError=abs(outData1[jj]-outData2[jj]);
}
printf("maxError: %i ",maxError);
free(subtractM);
free(multiplyM);
free(data);
free(outData1);
free(outData2);
return 0;
}
double randomu() {
return (double)rand()/(double)RAND_MAX;
}
double GetTimeOfDay(void) {
#ifdef WIN32
static LARGE_INTEGER freq; LARGE_INTEGER time;
if(freq.QuadPart==0) QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&time);
return (double)time.QuadPart/freq.QuadPart;
#else
struct timeval currTime;
gettimeofday(&currTime,NULL);
return((double)((double)currTime.tv_sec+
(double)currTime.tv_usec/1000000.0));
#endif
}
Regards,
Vladimir
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Time for calculation (800 frames): 1.738825 (s)Thanks Vladimir, and let your engineer know I am grateful!
Time for one frame: 0.002174 (s)
Frequency: 460.080812
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 0.699127 (s)
Time for one frame: 0.000874 (s)
IPP Frequency: 1144.284661
maxError: 1
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
tmpInt=malloc(XDIM*YDIM*sizeof(short));This does pretty good too, but not as good as Vladimir's:
tmpFlt=malloc(XDIM*YDIM*sizeof(float));
start=GetTimeOfDay();
for(ii=0;ii
ippsSub_16u_Sfs(subtractM,&data[(ii%LINES)*YDIM*XDIM],tmpInt,
XDIM*YDIM, 0 );
ippsConvert_16u32f( tmpInt, tmpFlt, XDIM*YDIM );
ippsMul_32f_I( multiplyM, tmpFlt, XDIM*YDIM );
ippsConvert_32f16u_Sfs( tmpFlt, &outData2[(ii%LINES)*YDIM*XDIM],
XDIM*YDIM, ippRndZero, 0 );
}
Time for calculation (800 frames): 1.700834 (s)So, I think the lesson is, use the signal processing calls if you can.
Time for one frame: 0.002126 (s)
Frequency: 470.357378
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 0.815084 (s)
Time for one frame: 0.001019 (s)
IPP Frequency: 981.493513
maxError: 1
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
for(ii=0;iiThis is actually slower:for( i = 0; i < YDIM; i++ ){
ippsSub_16u_Sfs(&subtractM[i*XDIM],&data[(ii%LINES)*YDIM*XDIM+i*XDIM],&tmpInt[i*XDIM],
XDIM, 0 );
ippsConvert_16u32f( &tmpInt[i*XDIM], &tmpFlt[i*XDIM], XDIM );
ippsMul_32f_I( &multiplyM[i*XDIM], &tmpFlt[i*XDIM], XDIM );
ippsConvert_32f16u_Sfs( &tmpFlt[i*XDIM], &outData2[(ii%LINES)*YDIM*XDIM+i*XDIM],
XDIM, ippRndZero, 0 );
}
Time for calculation (800 frames): 1.694970 (s)
Time for one frame: 0.002119 (s)
Frequency: 471.984726
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 1.077069 (s)
Time for one frame: 0.001346 (s)
IPP Frequency: 742.756803
maxError: 1
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
for(ii=0;iiThis code is just about as fast as Vladimir's code:for( i = 0; i < YDIM; i++ ){
unsigned short *tmpin = &data[(ii%LINES)*YDIM*XDIM + i * XDIM];
unsigned short *tmpint = &tmpInt[XDIM * i];
unsigned short *tmpins = &subtractM[XDIM * i];
float *tmpflt = tmpFlt;
float *tmpmul = &multiplyM[XDIM * i];
unsigned short *tmpout = &outData2[(ii%LINES)*YDIM*XDIM + i * XDIM];
ippsSub_16u_Sfs( tmpins, tmpin, tmpint, XDIM, 0 );
ippsConvert_16u32f( tmpint, tmpflt, XDIM );
ippsMul_32f_I( tmpmul, tmpflt, XDIM );
ippsConvert_32f16u_Sfs( tmpflt, tmpout, XDIM, ippRndZero, 0 );
}
Time for calculation (800 frames): 1.692036 (s)So try not to do pointer arithmetic in IPP functions.
Time for one frame: 0.002115 (s)
Frequency: 472.803299
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 0.696450 (s)
Time for one frame: 0.000871 (s)
IPP Frequency: 1148.682410
maxError: 1
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
tmpInt=malloc(XDIM*sizeof(short));This is pretty fast now:
tmpFlt=malloc(XDIM*sizeof(float));
start=GetTimeOfDay();
for(ii=0;iifor( i = 0; i < YDIM; i++ ){
unsigned short *tmpin = &data[(ii%LINES)*YDIM*XDIM + i * XDIM];
unsigned short *tmpins = &subtractM[XDIM * i ];
float *tmpmul = &multiplyM[XDIM* i ];
unsigned short *tmpout = &outData2[(ii%LINES)*YDIM*XDIM + i * XDIM];
ippsSub_16u_Sfs( tmpins, tmpin, tmpInt, XDIM, 0 );
ippsConvert_16u32f( tmpInt, tmpFlt, XDIM );
ippsMul_32f_I( tmpmul, tmpFlt, XDIM );
ippsConvert_32f16u_Sfs( tmpFlt, tmpout, XDIM, ippRndZero, 0 );
}
Time for calculation (800 frames): 1.702078 (s)I think this is where I am going to quit. Here's my summary:
Time for one frame: 0.002128 (s)
Frequency: 470.013800
============================
ippit7-5.2.dll 5.2 5.2.108.410
============================
Time for IPP calculation (800 frames): 0.499302 (s)
Time for one frame: 0.000624 (s)
IPP Frequency: 1602.236225
maxError: 1
- Use the IPPS library instead of IPPI wherever you can.
- Do calculation with small chunks of data (rows in this case), rather than all at once. (I guess? Maybe? I'm not sure about this one.)
- Do pointer math, but not in IPP functions (and don't do extra pointer math).

- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page