- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I'm benchmarking different implementations of 1D-FFT, and I've written a simple code for testing IPP:
#include
#include
#include
int main() {
for (unsigned int myOrder = 10; myOrder < 21; myOrder++) {
const unsigned int myLength = (1 << myOrder);
std::cout << "Length = " << myLength << std::endl;
IppsFFTSpec_C_32fc* mySpec;
ippsFFTInitAlloc_C_32fc(&mySpec, myOrder, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast);
int myBufferSize = 0;
ippsFFTGetBufSize_C_32fc(mySpec, &myBufferSize);
Ipp8u *myBuffer = ippsMalloc_8u(myBufferSize);
Ipp32fc *myA = ippsMalloc_32fc(myLength);
Ipp32fc *myB = ippsMalloc_32fc(myLength);
for(unsigned int n = 0; n < myLength; ++n) {
myA.re = rand()/RAND_MAX;
myA.im = rand()/RAND_MAX;
}
struct timeval myStartingTime;
struct timeval myEndingTime;
gettimeofday(&myStartingTime, 0);
for (unsigned int myRepetition = 0; myRepetition < 1000; ++myRepetition) {
ippsFFTFwd_CToC_32fc(myA, myB, mySpec, myBuffer);
}
gettimeofday(&myEndingTime, 0);
double myElapsedSeconds = myEndingTime.tv_sec - myStartingTime.tv_sec +
(myEndingTime.tv_usec - myStartingTime.tv_usec) / 1000000.0;
std::cout << "\t\testimated Mflops: "
<< 2.5 * myOrder * myLength / myElapsedSeconds / 1000
<< std::endl;
ippsFree(myA);
ippsFree(myB);
ippsFree(myBuffer);
ippsFFTFree_C_32fc(mySpec);
}
}
And I compile it with icc.
It seems to be drammatically slower then FFTW3 implementation, BUT I'm not happy with this result: I've found benchmarks (http://www.fftw.org/speed/CoreDuo-3.0GHz-icc64/) showing performances to be similar.
I'm afraid (pretty sure) I'm not fully exploiting IPP. How should I speed up this code?
Thanks
francesco
#include
#include
#include
int main() {
for (unsigned int myOrder = 10; myOrder < 21; myOrder++) {
const unsigned int myLength = (1 << myOrder);
std::cout << "Length = " << myLength << std::endl;
IppsFFTSpec_C_32fc* mySpec;
ippsFFTInitAlloc_C_32fc(&mySpec, myOrder, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast);
int myBufferSize = 0;
ippsFFTGetBufSize_C_32fc(mySpec, &myBufferSize);
Ipp8u *myBuffer = ippsMalloc_8u(myBufferSize);
Ipp32fc *myA = ippsMalloc_32fc(myLength);
Ipp32fc *myB = ippsMalloc_32fc(myLength);
for(unsigned int n = 0; n < myLength; ++n) {
myA
myA
}
struct timeval myStartingTime;
struct timeval myEndingTime;
gettimeofday(&myStartingTime, 0);
for (unsigned int myRepetition = 0; myRepetition < 1000; ++myRepetition) {
ippsFFTFwd_CToC_32fc(myA, myB, mySpec, myBuffer);
}
gettimeofday(&myEndingTime, 0);
double myElapsedSeconds = myEndingTime.tv_sec - myStartingTime.tv_sec +
(myEndingTime.tv_usec - myStartingTime.tv_usec) / 1000000.0;
std::cout << "\t\testimated Mflops: "
<< 2.5 * myOrder * myLength / myElapsedSeconds / 1000
<< std::endl;
ippsFree(myA);
ippsFree(myB);
ippsFree(myBuffer);
ippsFFTFree_C_32fc(mySpec);
}
}
And I compile it with icc.
It seems to be drammatically slower then FFTW3 implementation, BUT I'm not happy with this result: I've found benchmarks (http://www.fftw.org/speed/CoreDuo-3.0GHz-icc64/) showing performances to be similar.
I'm afraid (pretty sure) I'm not fully exploiting IPP. How should I speed up this code?
Thanks
francesco
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
...my mistake, I was using different normalizations, the performances are comparable now.
Apart from this mistake, any advice or trick speeding up IPP-FFT is very wellcome!
Francesco
Apart from this mistake, any advice or trick speeding up IPP-FFT is very wellcome!
Francesco

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page