Intel® Integrated Performance Primitives
Deliberate problems developing high-performance vision, signal, security, and storage applications.

complex FTT vs real FFT, which one is faster?

josh83
Beginner
1,728 Views

Hello everyone!


Those days, I have been playing around with the IPP FTT functions.

I especially try to compare the performance between a complex FTT and a real FFT (1D single-precison).


I have already red that complex FTT are faster than real FFT, for instance here :

1. Hello, according our expert, amount of processed data in functions ippsFFTFwd_CToC_32fcand ippsFFTFwd_RToPerm is the same, but in the last function more calculations are needed. It is the reason why this function work slower for the same buffer size. Regards, Vladimir

2. Also according to this speed benchmark, http://www.fftw.org/speed/CoreDuo-3.0GHz-icc/ , complexFTT seems to be 30% faster as realFFT


However, when i'm testing both on my core2duo 1.8ghz, real FFT are faster...

-- IPP FTT 1D speed test (fft size: 256) --

fft_1D_real: 1620.1 clocks per FFT > FASTER !
fft_1D_complex: 2261.6 clocks per FFT


Did I misunderstand something ?

Thanx for your help! ++Josh


Here is my test code :
[cpp]#include 
#include
#include
#include

#include "ipp.h"

#define FFT_SIZE 256
#define FFT_ORDER 8
#define LOOP_MAX 200000

using namespace std;


// REAL FTT

IppStatus fft_ipp_real()
{
int i, loop;
IppStatus status;
IppsFFTSpec_R_32f *mySpecReal;

Ipp32f *input = ippsMalloc_32f(FFT_SIZE);
Ipp32f *output = ippsMalloc_32f(FFT_SIZE + 2);

for(i = 0; i < FFT_SIZE; i++)
{
input = (float)cos(0.8 * i);
}

status = ippsFFTInitAlloc_R_32f(&mySpecReal, FFT_ORDER, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast);

int bufferSize = 0;
status = ippsFFTGetBufSize_R_32f(mySpecReal, &bufferSize);
Ipp8u *myBuffer = (bufferSize > 0 ? ippsMalloc_8u(bufferSize) : NULL);

for(loop = 0; loop < LOOP_MAX; loop++)
{
status = ippsFFTFwd_RToCCS_32f(input, output, mySpecReal, myBuffer);
}

ippsFFTFree_R_32f(mySpecReal);
ippsFree(myBuffer);
ippsFree(input);
ippsFree(output);

return status;
}


// COMPLEX FTT

IppStatus fft_ipp_complex()
{
int i, loop;
IppStatus status;
IppsFFTSpec_C_32fc *mySpecComplex;

Ipp32fc *input = ippsMalloc_32fc(FFT_SIZE);
Ipp32fc *output = ippsMalloc_32fc(FFT_SIZE + 2);

for(i = 0; i < FFT_SIZE; i++)
{
input.re = (float)cos(0.8 * i);
input.im = 0;
}

status = ippsFFTInitAlloc_C_32fc(&mySpecComplex, FFT_ORDER, IPP_FFT_NODIV_BY_ANY, ippAlgHintFast);

int bufferSize = 0;
status = ippsFFTGetBufSize_C_32fc(mySpecComplex, &bufferSize);
Ipp8u *myBuffer = (bufferSize > 0 ? ippsMalloc_8u(bufferSize) : NULL);

for(loop = 0; loop < LOOP_MAX; loop++)
{
status = ippsFFTFwd_CToC_32fc(input, output, mySpecComplex, myBuffer);
}

ippsFFTFree_C_32fc(mySpecComplex);
ippsFree(myBuffer);
ippsFree(input);
ippsFree(output);

return status;
}


// THE MAIN

int main(int argc, char **argv)
{
Ipp64u startClocks;

ippStaticInit();
printf("\n-- IPP FTT 1D speed test (fft size: %d) --\n\n", FFT_SIZE);

startClocks = ippGetCpuClocks();
fft_ipp_real();
printf("fft_1D_real: %.1f clocks per FFT\n", (float)(ippGetCpuClocks() - startClocks) / (float)LOOP_MAX);

startClocks = ippGetCpuClocks();
fft_ipp_complex();
printf("fft_1D_complex: %.1f clocks per FFT", (float)(ippGetCpuClocks() - startClocks) / (float)LOOP_MAX);

printf("\n\nPress any key to exit...\n");
getch();

return 0;
}
[/cpp]
0 Kudos
2 Replies
piet_de_weer
Beginner
1,728 Views
Real FFT's should be faster. A real FFT can assume that all the input (or output) imaginary values are 0. Which enables some extra optimizations.
0 Kudos
igorastakhov
New Contributor II
1,728 Views

Real FFT functions are faster than complex FFT functionsfor the identical size of transform (their execution time is less) because theyperform 2x less calculations.

Performance of FFT functions (www.fftw.org) is calculatedaccording to thenext formulas

5*n*log (n)/time - for complex FFT

5*n*log (n)/time/2 - for real FFT

Therefore if the execution time of real FFT function is more than half (0.5x)of the execution time of complex FFT function its performanceis less.

0 Kudos
Reply