FFT-Based 3D Convolution With Zero Padding

A2009 · ‎12-05-2014

I have been trying to figure out how I can use Intel MKL to perform a FFT-based 3D convolution with zero-padding. I have been searching and posting in online forums (including Intel MKL forum), unfortunately, I have not been very successful so far.

I have a 3D array and is stored as a 1D array of type double in a columnwise fashion. Similarly the kernel is of type double and is saved columnwise. For example,

for( int k = 0; k < nk; k++ ) // Loop through the height.
    for( int j = 0; j < nj; j++ ) // Loop through the rows.
        for( int i = 0; i < ni; i++ ) // Loop through the columns.
        {
            ijk = i + ni * j + ni * nj * k;
            my3Darray[ ijk ] = 1.0;
        }

I am writing a 3D convolution function:

which takes in real values (not complex values) and
outputs the results of the convolution,
for the computation of convolution, I am performing a "not-in-place" FFT on the input array as well as the kernel in order to prevent them from getting modified (I need to use them later in my code) and
then do the backward computation "in-place".

During the process I am also considering the zero padding to avoid any artifacts. The size of FFTs are (dim_input+dim_kernel-1) on each dimension and the next highest power of two is chosen for speed.

My questions are:

How can I perform the zero-padding?
How should I deal with the size of the arrays used by FFT functions?
How can I take out the zero padded results and get the actual result?

I would be absolutely grateful to have any comments or suggestion.

#include "mkl.h"

int max(int a, int b, int c);
void Conv3D_R2C( 
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out );
    
int main()
{

    int n = 5;
    int nkernel = 3;

    double *a          = new double [n*n*n]; // This array is real.
    double *aconvolved = new double [n*n*n]; // The convolved array is also real.
    double *kernel     = new double [nkernel*nkernel*nkernel]; // kernel is real.

    // Fill the array with some 'real' numbers.
    for( int i = 0; i < n*n*n; i++ )
        a[ i ] = 1.0;

    // Fill the kernel with some 'real' numbers.
    for( int i = 0; i < nkernel*nkernel*nkernel; i++ )
        kernel[ i ] = 1.0;

    // Calculate the convolution.
    Conv3D_R2C( a, n, n, n, kernel, nkernel, nkernel, nkernel, aconvolved );

    delete[] a;
    delete[] kernel;
    delete[] aconvolved;
}

void Conv3D_R2C( // Real to Complex 3D FFT.
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out )
{
    
    int nIn  = max( nRowsIn , nColsIn , nHeightsIn  );
    int nKer = max( nRowsKer, nColsKer, nHeightsKer );
    int n = nIn + nKer - 1;

    /* Strides describe data layout in real and conjugate-even domain. */
    MKL_LONG rs[4], cs[4];
    
    // DFTI descriptor.
    DFTI_DESCRIPTOR_HANDLE fft_desc = 0;
    
    // Round up to the next highest power of 2.
    unsigned int N = (unsigned int) n; // compute the next highest power of 2 of 32-bit n.
    N--;
    N |= N >> 1;
    N |= N >> 2;
    N |= N >> 4;
    N |= N >> 8;
    N |= N >> 16;
    N++;
    
    // Variables needed for out-of-place computations.
    MKL_Complex16 *in_fft  = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *ker_fft = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *out_fft = new MKL_Complex16 [ N*N*N ];
    double *out2 = new double [ N*N*N ];
   
    /* Compute strides */
    rs[3] = 1;           cs[3] = 1;
    rs[2] = (N/2+1)*2;   cs[2] = (N/2+1);
    rs[1] = N*(N/2+1)*2; cs[1] = N*(N/2+1);
    rs[0] = 0;           cs[0] = 0;
    
    // Create DFTI descriptor.
    MKL_LONG sizes[] = { N, N, N };
    DftiCreateDescriptor( &fft_desc, DFTI_DOUBLE, DFTI_REAL, 3, sizes );
    
    // Configure DFTI descriptor.
    DftiSetValue        ( fft_desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX );
    DftiSetValue        ( fft_desc, DFTI_PLACEMENT             , DFTI_NOT_INPLACE     ); // Out-of-place transformation.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES  , rs  );
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES , cs  );
    DftiCommitDescriptor( fft_desc );
    DftiComputeForward  ( fft_desc, in , in_fft  );
    DftiComputeForward  ( fft_desc, ker, ker_fft );

    for(long long i = 0; i < (long long)N*N*N; i++ )
    {
        out_fft.real = in_fft.real * ker_fft.real;
        out_fft.imag = in_fft.imag * ker_fft.imag;
    }

    // Change strides to compute backward transform.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES , cs);
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES, rs);
    DftiCommitDescriptor( fft_desc );
    DftiComputeBackward ( fft_desc, out_fft, out2 );

    // Printing the zero padded 3D convolved result.
    for( long long i = 0; i < (long long)N*N*N; i++ )
        printf( out2, N*N*N );
    
    /* I don't know how to take out the zero padded results and 
       save the actual result in the variable named "out" */

    DftiFreeDescriptor  ( &fft_desc );

    delete[] in_fft;
    delete[] ker_fft;
    delete[] out2;
}

int max(int a, int b, int c)
{
     int m = a;
     (m < b) && (m = b); //these are not conditional statements.
     (m < c) && (m = c); //these are just boolean expressions.
     return m;
}