<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic FFT-Based 3D Convolution With Zero Padding in Intel® oneAPI Math Kernel Library</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/FFT-Based-3D-Convolution-With-Zero-Padding/m-p/1013254#M19322</link>
    <description>&lt;P&gt;I have been trying to figure out how I can use Intel MKL to perform a FFT-based 3D convolution with zero-padding. &amp;nbsp;I have been searching and posting in online forums (including Intel MKL forum), unfortunately, I have not been very successful so far.&amp;nbsp;&lt;/P&gt;

&lt;P&gt;I have a 3D array and is stored as a 1D array of type double in a columnwise fashion. Similarly the kernel is of type double and is saved columnwise. For example,&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;for( int k = 0; k &amp;lt; nk; k++ ) // Loop through the height.
    for( int j = 0; j &amp;lt; nj; j++ ) // Loop through the rows.
        for( int i = 0; i &amp;lt; ni; i++ ) // Loop through the columns.
        {
            ijk = i + ni * j + ni * nj * k;
            my3Darray[ ijk ] = 1.0;
        }&lt;/PRE&gt;

&lt;P&gt;I am writing a 3D convolution function:&lt;/P&gt;

&lt;UL&gt;
	&lt;LI&gt;which takes in real values (not complex values) and&lt;/LI&gt;
	&lt;LI&gt;outputs the results of the convolution,&lt;/LI&gt;
	&lt;LI&gt;for the computation of convolution, I am performing a "not-in-place" FFT on the input array as well as the kernel in order to prevent them from getting modified (I need to use them later in my code) and&lt;/LI&gt;
	&lt;LI&gt;then do the backward computation "in-place".&lt;/LI&gt;
&lt;/UL&gt;

&lt;P&gt;During the process I am also considering the zero padding to avoid any artifacts. &amp;nbsp;The size of FFTs are (dim_input+dim_kernel-1) on each dimension and the next highest power of two is chosen for speed.&lt;/P&gt;

&lt;P&gt;My questions are:&lt;/P&gt;

&lt;OL&gt;
	&lt;LI&gt;How can I perform the zero-padding?&lt;/LI&gt;
	&lt;LI&gt;How should I deal with the size of the arrays used by FFT functions?&lt;/LI&gt;
	&lt;LI&gt;How can I take out the zero padded results and get the actual result?&lt;/LI&gt;
&lt;/OL&gt;

&lt;P&gt;I would be &amp;nbsp;&lt;STRONG&gt;absolutely grateful&lt;/STRONG&gt; to have any comments or suggestion.&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;#include "mkl.h"

int max(int a, int b, int c);
void Conv3D_R2C( 
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out );
    
int main()
{

    int n = 5;
    int nkernel = 3;

    double *a          = new double [n*n*n]; // This array is real.
    double *aconvolved = new double [n*n*n]; // The convolved array is also real.
    double *kernel     = new double [nkernel*nkernel*nkernel]; // kernel is real.

    // Fill the array with some 'real' numbers.
    for( int i = 0; i &amp;lt; n*n*n; i++ )
        a[ i ] = 1.0;

    // Fill the kernel with some 'real' numbers.
    for( int i = 0; i &amp;lt; nkernel*nkernel*nkernel; i++ )
        kernel[ i ] = 1.0;

    // Calculate the convolution.
    Conv3D_R2C( a, n, n, n, kernel, nkernel, nkernel, nkernel, aconvolved );

    delete[] a;
    delete[] kernel;
    delete[] aconvolved;
}

void Conv3D_R2C( // Real to Complex 3D FFT.
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out )
{
    
    int nIn  = max( nRowsIn , nColsIn , nHeightsIn  );
    int nKer = max( nRowsKer, nColsKer, nHeightsKer );
    int n = nIn + nKer - 1;

    /* Strides describe data layout in real and conjugate-even domain. */
    MKL_LONG rs[4], cs[4];
    
    // DFTI descriptor.
    DFTI_DESCRIPTOR_HANDLE fft_desc = 0;
    
    // Round up to the next highest power of 2.
    unsigned int N = (unsigned int) n; // compute the next highest power of 2 of 32-bit n.
    N--;
    N |= N &amp;gt;&amp;gt; 1;
    N |= N &amp;gt;&amp;gt; 2;
    N |= N &amp;gt;&amp;gt; 4;
    N |= N &amp;gt;&amp;gt; 8;
    N |= N &amp;gt;&amp;gt; 16;
    N++;
    
    // Variables needed for out-of-place computations.
    MKL_Complex16 *in_fft  = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *ker_fft = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *out_fft = new MKL_Complex16 [ N*N*N ];
    double *out2 = new double [ N*N*N ];
   
    /* Compute strides */
    rs[3] = 1;           cs[3] = 1;
    rs[2] = (N/2+1)*2;   cs[2] = (N/2+1);
    rs[1] = N*(N/2+1)*2; cs[1] = N*(N/2+1);
    rs[0] = 0;           cs[0] = 0;
    
    // Create DFTI descriptor.
    MKL_LONG sizes[] = { N, N, N };
    DftiCreateDescriptor( &amp;amp;fft_desc, DFTI_DOUBLE, DFTI_REAL, 3, sizes );
    
    // Configure DFTI descriptor.
    DftiSetValue        ( fft_desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX );
    DftiSetValue        ( fft_desc, DFTI_PLACEMENT             , DFTI_NOT_INPLACE     ); // Out-of-place transformation.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES  , rs  );
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES , cs  );
    DftiCommitDescriptor( fft_desc );
    DftiComputeForward  ( fft_desc, in , in_fft  );
    DftiComputeForward  ( fft_desc, ker, ker_fft );

    for(long long i = 0; i &amp;lt; (long long)N*N*N; i++ )
    {
        out_fft&lt;I&gt;.real = in_fft&lt;I&gt;.real * ker_fft&lt;I&gt;.real;
        out_fft&lt;I&gt;.imag = in_fft&lt;I&gt;.imag * ker_fft&lt;I&gt;.imag;
    }

    // Change strides to compute backward transform.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES , cs);
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES, rs);
    DftiCommitDescriptor( fft_desc );
    DftiComputeBackward ( fft_desc, out_fft, out2 );

    // Printing the zero padded 3D convolved result.
    for( long long i = 0; i &amp;lt; (long long)N*N*N; i++ )
        printf( out2, N*N*N );
    
    /* I don't know how to take out the zero padded results and 
       save the actual result in the variable named "out" */

    DftiFreeDescriptor  ( &amp;amp;fft_desc );

    delete[] in_fft;
    delete[] ker_fft;
    delete[] out2;
}

int max(int a, int b, int c)
{
     int m = a;
     (m &amp;lt; b) &amp;amp;&amp;amp; (m = b); //these are not conditional statements.
     (m &amp;lt; c) &amp;amp;&amp;amp; (m = c); //these are just boolean expressions.
     return m;
}&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Sat, 06 Dec 2014 00:01:31 GMT</pubDate>
    <dc:creator>A2009</dc:creator>
    <dc:date>2014-12-06T00:01:31Z</dc:date>
    <item>
      <title>FFT-Based 3D Convolution With Zero Padding</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/FFT-Based-3D-Convolution-With-Zero-Padding/m-p/1013254#M19322</link>
      <description>&lt;P&gt;I have been trying to figure out how I can use Intel MKL to perform a FFT-based 3D convolution with zero-padding. &amp;nbsp;I have been searching and posting in online forums (including Intel MKL forum), unfortunately, I have not been very successful so far.&amp;nbsp;&lt;/P&gt;

&lt;P&gt;I have a 3D array and is stored as a 1D array of type double in a columnwise fashion. Similarly the kernel is of type double and is saved columnwise. For example,&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;for( int k = 0; k &amp;lt; nk; k++ ) // Loop through the height.
    for( int j = 0; j &amp;lt; nj; j++ ) // Loop through the rows.
        for( int i = 0; i &amp;lt; ni; i++ ) // Loop through the columns.
        {
            ijk = i + ni * j + ni * nj * k;
            my3Darray[ ijk ] = 1.0;
        }&lt;/PRE&gt;

&lt;P&gt;I am writing a 3D convolution function:&lt;/P&gt;

&lt;UL&gt;
	&lt;LI&gt;which takes in real values (not complex values) and&lt;/LI&gt;
	&lt;LI&gt;outputs the results of the convolution,&lt;/LI&gt;
	&lt;LI&gt;for the computation of convolution, I am performing a "not-in-place" FFT on the input array as well as the kernel in order to prevent them from getting modified (I need to use them later in my code) and&lt;/LI&gt;
	&lt;LI&gt;then do the backward computation "in-place".&lt;/LI&gt;
&lt;/UL&gt;

&lt;P&gt;During the process I am also considering the zero padding to avoid any artifacts. &amp;nbsp;The size of FFTs are (dim_input+dim_kernel-1) on each dimension and the next highest power of two is chosen for speed.&lt;/P&gt;

&lt;P&gt;My questions are:&lt;/P&gt;

&lt;OL&gt;
	&lt;LI&gt;How can I perform the zero-padding?&lt;/LI&gt;
	&lt;LI&gt;How should I deal with the size of the arrays used by FFT functions?&lt;/LI&gt;
	&lt;LI&gt;How can I take out the zero padded results and get the actual result?&lt;/LI&gt;
&lt;/OL&gt;

&lt;P&gt;I would be &amp;nbsp;&lt;STRONG&gt;absolutely grateful&lt;/STRONG&gt; to have any comments or suggestion.&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;#include "mkl.h"

int max(int a, int b, int c);
void Conv3D_R2C( 
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out );
    
int main()
{

    int n = 5;
    int nkernel = 3;

    double *a          = new double [n*n*n]; // This array is real.
    double *aconvolved = new double [n*n*n]; // The convolved array is also real.
    double *kernel     = new double [nkernel*nkernel*nkernel]; // kernel is real.

    // Fill the array with some 'real' numbers.
    for( int i = 0; i &amp;lt; n*n*n; i++ )
        a[ i ] = 1.0;

    // Fill the kernel with some 'real' numbers.
    for( int i = 0; i &amp;lt; nkernel*nkernel*nkernel; i++ )
        kernel[ i ] = 1.0;

    // Calculate the convolution.
    Conv3D_R2C( a, n, n, n, kernel, nkernel, nkernel, nkernel, aconvolved );

    delete[] a;
    delete[] kernel;
    delete[] aconvolved;
}

void Conv3D_R2C( // Real to Complex 3D FFT.
    double *in, int nRowsIn , int nColsIn , int nHeightsIn ,
    double *ker, int nRowsKer, int nColsKer, int nHeightsKer,
    double *out )
{
    
    int nIn  = max( nRowsIn , nColsIn , nHeightsIn  );
    int nKer = max( nRowsKer, nColsKer, nHeightsKer );
    int n = nIn + nKer - 1;

    /* Strides describe data layout in real and conjugate-even domain. */
    MKL_LONG rs[4], cs[4];
    
    // DFTI descriptor.
    DFTI_DESCRIPTOR_HANDLE fft_desc = 0;
    
    // Round up to the next highest power of 2.
    unsigned int N = (unsigned int) n; // compute the next highest power of 2 of 32-bit n.
    N--;
    N |= N &amp;gt;&amp;gt; 1;
    N |= N &amp;gt;&amp;gt; 2;
    N |= N &amp;gt;&amp;gt; 4;
    N |= N &amp;gt;&amp;gt; 8;
    N |= N &amp;gt;&amp;gt; 16;
    N++;
    
    // Variables needed for out-of-place computations.
    MKL_Complex16 *in_fft  = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *ker_fft = new MKL_Complex16 [ N*N*N ];
    MKL_Complex16 *out_fft = new MKL_Complex16 [ N*N*N ];
    double *out2 = new double [ N*N*N ];
   
    /* Compute strides */
    rs[3] = 1;           cs[3] = 1;
    rs[2] = (N/2+1)*2;   cs[2] = (N/2+1);
    rs[1] = N*(N/2+1)*2; cs[1] = N*(N/2+1);
    rs[0] = 0;           cs[0] = 0;
    
    // Create DFTI descriptor.
    MKL_LONG sizes[] = { N, N, N };
    DftiCreateDescriptor( &amp;amp;fft_desc, DFTI_DOUBLE, DFTI_REAL, 3, sizes );
    
    // Configure DFTI descriptor.
    DftiSetValue        ( fft_desc, DFTI_CONJUGATE_EVEN_STORAGE, DFTI_COMPLEX_COMPLEX );
    DftiSetValue        ( fft_desc, DFTI_PLACEMENT             , DFTI_NOT_INPLACE     ); // Out-of-place transformation.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES  , rs  );
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES , cs  );
    DftiCommitDescriptor( fft_desc );
    DftiComputeForward  ( fft_desc, in , in_fft  );
    DftiComputeForward  ( fft_desc, ker, ker_fft );

    for(long long i = 0; i &amp;lt; (long long)N*N*N; i++ )
    {
        out_fft&lt;I&gt;.real = in_fft&lt;I&gt;.real * ker_fft&lt;I&gt;.real;
        out_fft&lt;I&gt;.imag = in_fft&lt;I&gt;.imag * ker_fft&lt;I&gt;.imag;
    }

    // Change strides to compute backward transform.
    DftiSetValue        ( fft_desc, DFTI_INPUT_STRIDES , cs);
    DftiSetValue        ( fft_desc, DFTI_OUTPUT_STRIDES, rs);
    DftiCommitDescriptor( fft_desc );
    DftiComputeBackward ( fft_desc, out_fft, out2 );

    // Printing the zero padded 3D convolved result.
    for( long long i = 0; i &amp;lt; (long long)N*N*N; i++ )
        printf( out2, N*N*N );
    
    /* I don't know how to take out the zero padded results and 
       save the actual result in the variable named "out" */

    DftiFreeDescriptor  ( &amp;amp;fft_desc );

    delete[] in_fft;
    delete[] ker_fft;
    delete[] out2;
}

int max(int a, int b, int c)
{
     int m = a;
     (m &amp;lt; b) &amp;amp;&amp;amp; (m = b); //these are not conditional statements.
     (m &amp;lt; c) &amp;amp;&amp;amp; (m = c); //these are just boolean expressions.
     return m;
}&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 06 Dec 2014 00:01:31 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/FFT-Based-3D-Convolution-With-Zero-Padding/m-p/1013254#M19322</guid>
      <dc:creator>A2009</dc:creator>
      <dc:date>2014-12-06T00:01:31Z</dc:date>
    </item>
  </channel>
</rss>

