<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Hi Piotr,  in Intel® Integrated Performance Primitives</title>
    <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987747#M21969</link>
    <description>&lt;P&gt;Hi Piotr,&amp;nbsp;&lt;/P&gt;

&lt;P&gt;The code looks ok, just one small problem about&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&lt;CODE style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px 0.3em 0px 0px !important; border: 0px !important; outline: 0px !important; text-align: right !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: 2.7em !important; line-height: 1.1em !important; min-height: auto !important; display: block !important; background: none !important;"&gt;21&lt;/CODE&gt;&lt;CODE class="spaces" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; background: none !important;"&gt;&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background: none !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;(Ipp32f));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;and&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;ppDst[ii] = pDst + ii * (dstStep /&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-size: 13.008px; line-height: 14.3088px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;(Ipp32f));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;Here ppDst is double pointers. &amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;if in 32bit application, it is ok as sizeof(Ipp32f)=4 and sizeof(Ipp32f *) =4.&amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;but when in 64bit application. sizeof (IPP32f*)=8, not 4. so for safety,it is better to change&amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="spaces" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; background: none !important;"&gt;&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background: none !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;(&lt;STRONG&gt;Ipp32f*&lt;/STRONG&gt;));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Best Regards,&lt;BR /&gt;
	Ying&lt;/CODE&gt;&lt;BR /&gt;
	&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 28 Jan 2016 03:40:15 GMT</pubDate>
    <dc:creator>Ying_H_Intel</dc:creator>
    <dc:date>2016-01-28T03:40:15Z</dc:date>
    <item>
      <title>Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987728#M21950</link>
      <description>&lt;DIV&gt;&lt;FONT size="2"&gt;Does anybody has a simple example of how to perform a 2d separable convolution with an nxn filter. &lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;I do not understand welll how IPP works.&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Is it better to add the border to the image and then to use FilterRow and FilterColumnone after the other with the entire image as ROI, or calculate the border on fly using ippiFilterRowBorderPipeline? In the last case how can I deal with the top and bottom border since ippiFilterColumnPipeline does not include the on fly calculation of the borders.&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;I thinck I miss something, I have studied the example at the end of the section in the documentation, but that one is limitated by using a 3x3 kernel.&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Thanks&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Emilio&lt;/FONT&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 06 Jul 2005 21:30:22 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987728#M21950</guid>
      <dc:creator>emilio_maggio</dc:creator>
      <dc:date>2005-07-06T21:30:22Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987729#M21951</link>
      <description>&lt;DIV&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Hi,&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;You can use ippiFilterRowBorderPipeline and ippiFilterColumnPipeline. For ippiFilterColumnPipeline you have to prepare borders manually&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;We prepared simple example for you, please take a look on attached source file. (border - ippBorderRepl, kernelSize - 5, anchor - 2)&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Regards,&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt; Michael&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Fri, 08 Jul 2005 20:19:49 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987729#M21951</guid>
      <dc:creator>Intel_C_Intel</dc:creator>
      <dc:date>2005-07-08T20:19:49Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987730#M21952</link>
      <description>&lt;DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Hi,&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;You can use ippiFilterRowBorderPipeline and ippiFilterColumnPipeline. For ippiFilterColumnPipeline you have to prepare borders manually&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;We prepared simple example for you, please take a look on attached source file. (border - ippBorderRepl, kernelSize - 5, anchor - 2)&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt;Regards,&lt;/FONT&gt;&lt;/DIV&gt;
&lt;DIV&gt;&lt;FONT size="2"&gt; Michael&lt;/FONT&gt;&lt;/DIV&gt;&lt;/DIV&gt;</description>
      <pubDate>Wed, 24 Aug 2005 12:12:26 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987730#M21952</guid>
      <dc:creator>Intel_C_Intel</dc:creator>
      <dc:date>2005-08-24T12:12:26Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987731#M21953</link>
      <description>&lt;P&gt;Hallo &lt;FONT size="2"&gt;Michael,&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;Thanks for posting a general example regarding the convolution with separable filters. I am trying to modify your code so that it can be called from within Matlab (in a MEX file). There are a few things that still are not clear to me. I hope you can help me.&lt;/P&gt;
&lt;P&gt;I do not clearly understand what this section of the code does:&lt;/P&gt;
&lt;P&gt;/* organize dst buffer */&lt;BR /&gt; pTmp = (Ipp16s*)(ppDst+size.height);&lt;BR /&gt; for(i=0;i&lt;SIZE.HEIGHT&gt; ppDst&lt;I&gt; = pTmp;&lt;BR /&gt; ppSrc[i+2]=pTmp;&lt;BR /&gt; }&lt;BR /&gt; /* organize replicate border for ippiFilterColumnPipeline_16s_C1R */&lt;BR /&gt; /* top */&lt;BR /&gt; ppSrc[0]=ppSrc[2];&lt;BR /&gt; ppSrc[1]=ppSrc[2];&lt;BR /&gt; /* bottom */&lt;BR /&gt; ppSrc[i-1+4]=ppDst[size.height-1];&lt;BR /&gt; ppSrc[i-2+4]=ppDst[size.height-1];&lt;BR /&gt;&lt;/I&gt;&lt;/SIZE.HEIGHT&gt;&lt;/P&gt;
&lt;P&gt;I understand that you are arranging the pointers to the image borders for the replication but the whole process is not very clear. I would appreciate some more specific comments i nthe codeso that I can generalize this to any kernel size (I suspect that some coefficients like the 2 and the 4 in ppSrc[2] and ppSrc[i-2+4] are somehow related to the semisize of the filter that you using in this example but it is not clear why and how). &lt;/P&gt;
&lt;P&gt;I also include my version of the code so that you may spot major mistakes and bugs:&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" size="2"&gt;#ifdef&lt;/FONT&gt;&lt;FONT size="2"&gt; MEX_CONV2_SEP_SINGLE_USE_IPP&lt;/FONT&gt;&lt;/P&gt;&lt;FONT color="#0000ff" size="2"&gt;
&lt;P&gt;inline&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;void&lt;/FONT&gt;&lt;FONT size="2"&gt; CheckIPPStatus(IppStatus status)&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT size="2"&gt;{&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt;(status != ippStsNoErr)&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;{&lt;/P&gt;
&lt;P&gt;mexPrintf(&lt;FONT color="#800000" size="2"&gt;"
status = %s "&lt;/FONT&gt;&lt;FONT size="2"&gt;, ippGetStatusString(status));&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;mexErrMsgTxt(&lt;FONT color="#800000" size="2"&gt;"IPP error"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;}&lt;/P&gt;
&lt;P&gt;}&lt;/P&gt;&lt;FONT color="#008000" size="2"&gt;
&lt;P&gt;// Adapted from:&lt;/P&gt;
&lt;P&gt;// &lt;A href="http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx" target="_blank"&gt;http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx&lt;/A&gt;&lt;/P&gt;
&lt;P&gt;//&lt;/P&gt;
&lt;P&gt;// INTEL CORPORATION PROPRIETARY INFORMATION&lt;/P&gt;
&lt;P&gt;// This software is supplied under the terms of a license agreement or&lt;/P&gt;
&lt;P&gt;// nondisclosure agreement with Intel Corporation and may not be copied&lt;/P&gt;
&lt;P&gt;// or disclosed except in accordance with the terms of that agreement.&lt;/P&gt;
&lt;P&gt;// Copyright (c) 2005 Intel Corporation. All Rights Reserved.&lt;/P&gt;
&lt;P&gt;//&lt;/P&gt;
&lt;P&gt;// Separable 2D convolution example&lt;/P&gt;
&lt;P&gt;//&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;
&lt;P&gt;void&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; SepConv(&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nc, Ipp32f *hr, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nr)&lt;P&gt;&lt;/P&gt;
&lt;P&gt;{&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; sizerow, sizecol, i;&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; maxKernelSize = (Nc &amp;gt; Nr) ? Nc : Nr;&lt;P&gt;&lt;/P&gt;
&lt;P&gt;Ipp32f **ppDst, **ppSrc, *pTmp;&lt;/P&gt;
&lt;P&gt;Ipp8u *pBufferCol, *pBufferRow;&lt;/P&gt;
&lt;P&gt;IppStatus status;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;//int xAnchor = (Nr &amp;gt;&amp;gt; 1) + 1;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; yAnchor = (Nc &amp;gt;&amp;gt; 1) + 1;&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary dst buffer&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;ppDst = (Ipp32f**)ippsMalloc_8u(size-&amp;gt;width*size-&amp;gt;height*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f) + (size-&amp;gt;height)*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*));&lt;P&gt;&lt;/P&gt;
&lt;P&gt;ppSrc = (Ipp32f**)ippsMalloc_8u((size-&amp;gt;height+maxKernelSize)*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*)); &lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// size of temporary buffers&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;status = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &amp;amp;sizerow); &lt;/P&gt;
&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;
&lt;P&gt;status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &amp;amp;sizecol);&lt;/P&gt;
&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;//mexPrintf("
size = {%d,%d}", size-&amp;gt;height, size-&amp;gt;width);&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary buffers&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;pBufferCol = ippsMalloc_8u(sizecol);&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt; (pBufferCol == NULL) mexErrMsgTxt(&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error: failed to allocate column buffer"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;
&lt;P&gt;pBufferRow = ippsMalloc_8u(sizerow);&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt; (pBufferRow == NULL) mexErrMsgTxt(&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error: failed to allocate row buffer"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// organize dst buffer&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;pTmp = (Ipp32f*)(ppDst+size-&amp;gt;height);&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i&lt;SIZE-&gt;height; i++, pTmp += size-&amp;gt;width)&lt;P&gt;&lt;/P&gt;
&lt;P&gt;{&lt;/P&gt;
&lt;P&gt;ppDst&lt;I&gt; = pTmp;&lt;/I&gt;&lt;/P&gt;
&lt;P&gt;ppSrc[i+2] = pTmp;&lt;/P&gt;
&lt;P&gt;}&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/SIZE-&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// organize replicate border for ippiFilterColumnPipeline_32f_C1R&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// top &lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;ppSrc[0] = ppSrc[2];&lt;/P&gt;
&lt;P&gt;ppSrc[1] = ppSrc[2];&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// bottom &lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;ppSrc[i-1+4] = ppDst[size-&amp;gt;height-1];&lt;/P&gt;
&lt;P&gt;ppSrc[i-2+4] = ppDst[size-&amp;gt;height-1];&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// perform the convolution&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;/*&lt;P&gt;&lt;/P&gt;
&lt;P&gt;mexPrintf("
pSrcBuffer = {%f, %f, %f, %f}", pSrcBuffer[0], pSrcBuffer[1], pSrcBuffer[2], pSrcBuffer[3]);&lt;/P&gt;
&lt;P&gt;mexPrintf("
hc = {%f, %f, %f, %f}", hc[0], hc[1], hc[2], hc[3]);&lt;/P&gt;
&lt;P&gt;mexPrintf("
size = {%d, %d}", size-&amp;gt;height, size-&amp;gt;width);&lt;/P&gt;
&lt;P&gt;mexPrintf("
sizerow = %d, sizecol = %d", sizerow, sizecol);&lt;/P&gt;
&lt;P&gt;*/&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;status = ippiFilterRowBorderPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f*) pSrcBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), ppDst, &lt;P&gt;&lt;/P&gt;
&lt;P&gt;*size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);&lt;/P&gt;
&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;
&lt;P&gt;status = ippiFilterColumnPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f **)ppSrc, pDstBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), *size, &lt;P&gt;&lt;/P&gt;
&lt;P&gt;hr, Nr, pBufferCol);&lt;/P&gt;
&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;
&lt;P&gt;ippsF
ree(ppSrc);&lt;/P&gt;
&lt;P&gt;ippsFree(ppDst);&lt;/P&gt;
&lt;P&gt;ippsFree(pBufferCol);&lt;/P&gt;
&lt;P&gt;ippsFree(pBufferRow);&lt;/P&gt;
&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;
&lt;P&gt;#endif&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000000"&gt;Thanks in advance for your help,&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000000"&gt;Marco&lt;/FONT&gt;&lt;/P&gt;&lt;/FONT&gt;</description>
      <pubDate>Mon, 18 Jun 2007 17:17:32 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987731#M21953</guid>
      <dc:creator>zuliani</dc:creator>
      <dc:date>2007-06-18T17:17:32Z</dc:date>
    </item>
    <item>
      <title>Separable 2d convolution with nxn filter (within Matlab MEX fil</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987732#M21954</link>
      <description>&lt;P&gt;This is the version of the code that I would expect to generalize the one originally posted by Michael. In particular I modified the section for the organization of the buffers as follows:&lt;/P&gt;&lt;PRE&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// organize dst buffer &lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// (the offset takes care of the double pointer structure)&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;pTmp = (Ipp32f*)(ppDst+size-&amp;gt;height);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i&lt;SIZE-&gt;height; i++, pTmp += size-&amp;gt;width)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;ppDst&lt;I&gt; = pTmp;&lt;/I&gt;&lt;/P&gt;&lt;P&gt;ppSrc[i+yAnchor] = pTmp;&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/SIZE-&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i &amp;lt; yAnchor; i++) &lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// top&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc&lt;I&gt; = ppSrc[yAnchor];&lt;/I&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// bottom&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc[i + size-&amp;gt;height] = ppSrc[size-&amp;gt;height-1];&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;/PRE&gt;
&lt;P&gt;Unfortunately performing the second convolution (within the MEX file, ippiFilterColumnPipeline_32f_C1R) the status returned by IPP says &lt;FONT color="#ff0000"&gt;Null pointer error&lt;/FONT&gt;&lt;/P&gt;
&lt;P&gt;&lt;FONT color="#000000"&gt;I am reposting the original code for your convenience. Note that hxc and hr are the convolution kernels whose lenght is respectively Nc and Nr.&lt;/FONT&gt;&lt;/P&gt;&lt;PRE&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;#ifdef&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; MEX_CONV2_SEP_SINGLE_USE_IPP&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;inline&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;void&lt;/FONT&gt;&lt;FONT size="2"&gt; CheckIPPStatus(IppStatus status)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt;(status != ippStsNoErr)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;mexPrintf(&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"
status = %s "&lt;/FONT&gt;&lt;FONT size="2"&gt;, ippGetStatusString(status));&lt;P&gt;&lt;/P&gt;&lt;P&gt;mexErrMsgTxt(&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;P&gt;// Adapted from:&lt;/P&gt;&lt;P&gt;// &lt;A href="http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx" target="_blank"&gt;http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx&lt;/A&gt;&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;P&gt;// INTEL CORPORATION PROPRIETARY INFORMATION&lt;/P&gt;&lt;P&gt;// This software is supplied under the terms of a license agreement or&lt;/P&gt;&lt;P&gt;// nondisclosure agreement with Intel Corporation and may not be copied&lt;/P&gt;&lt;P&gt;// or disclosed except in accordance with the terms of that agreement.&lt;/P&gt;&lt;P&gt;// Copyright (c) 2005 Intel Corporation. All Rights Reserved.&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;P&gt;// Separable 2D convolution example&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;void&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; SepConv(&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nc, Ipp32f *hr, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nr)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; sizerow, sizecol, i;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; maxKernelSize = (Nc &amp;gt; Nr) ? Nc : Nr;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ipp32f **ppDst, **ppSrc, *pTmp;&lt;/P&gt;&lt;P&gt;Ipp8u *pBufferCol, *pBufferRow;&lt;/P&gt;&lt;P&gt;IppStatus status;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;//int xAnchor = (Nr &amp;gt;&amp;gt; 1) + 1;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT colo="" r="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; yAnchor = (Nc &amp;gt;&amp;gt; 1) + 1;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary dst buffer&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppDst = (Ipp32f**)ippsMalloc_8u(size-&amp;gt;width*size-&amp;gt;height*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f) + (size-&amp;gt;height)*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*));&lt;P&gt;&lt;/P&gt;&lt;P&gt;ppSrc = (Ipp32f**)ippsMalloc_8u((size-&amp;gt;height+maxKernelSize)*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*)); &lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// size of temporary buffers&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;status = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &amp;amp;sizerow); &lt;/P&gt;&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;&lt;P&gt;status = ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, maxKernelSize, &amp;amp;sizecol);&lt;/P&gt;&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary buffers&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;pBufferCol = ippsMalloc_8u(sizecol);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt; (pBufferCol == NULL) mexErrMsgTxt(&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error: failed to allocate column buffer"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;&lt;P&gt;pBufferRow = ippsMalloc_8u(sizerow);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt; (pBufferRow == NULL) mexErrMsgTxt(&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error: failed to allocate row buffer"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// organize dst buffer (the offset takes care of the double pointer structure)&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;pTmp = (Ipp32f*)(ppDst+size-&amp;gt;height);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i&lt;SIZE-&gt;height; i++, pTmp += size-&amp;gt;width)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;ppDst&lt;I&gt; = pTmp;&lt;/I&gt;&lt;/P&gt;&lt;P&gt;ppSrc[i+yAnchor] = pTmp;&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/SIZE-&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i &amp;lt; yAnchor; i++) &lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// top&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc&lt;I&gt; = ppSrc[yAnchor];&lt;/I&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// bottom&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc[i + size-&amp;gt;height] = ppSrc[size-&amp;gt;height-1];&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// perform the convolutions&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;status = ippiFilterRowBorderPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f*) pSrcBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), ppDst, *size, hc, Nc, yAnchor, ippBorderRepl, 0, pBufferRow);&lt;P&gt;&lt;/P&gt;&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;&lt;P&gt;status = ippiFilterColumnPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f **)ppSrc, pDstBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), *size, hr, Nr, pBufferCol);&lt;P&gt;&lt;/P&gt;&lt;P&gt;CheckIPPStatus(status);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;ippsFree(ppSrc);&lt;/P&gt;&lt;P&gt;ippsFree(ppDst);&lt;/P&gt;&lt;P&gt;ippsFree(pBufferCol);&lt;/P&gt;&lt;P&gt;ippsFree(pBufferRow);&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;#endif&lt;/P&gt;&lt;/FONT&gt;&lt;/PRE&gt;
&lt;P&gt;The above function is called as: &lt;/P&gt;&lt;PRE&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// in
version for the different byte ordering between Matlab and IPP&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;IppiSize size;&lt;/P&gt;&lt;P&gt;size.height = width;&lt;/P&gt;&lt;P&gt;size.width = height;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;SepConv((Ipp32f *)f, (Ipp32f *)g, &amp;amp;size, (Ipp32f *)hc, Nc, (Ipp32f *)hr, Nr);&lt;/P&gt;&lt;/FONT&gt;&lt;/PRE&gt;
&lt;P&gt;Again thanks in advance for any useful insight,&lt;/P&gt;
&lt;P&gt;Marco&lt;/P&gt;</description>
      <pubDate>Mon, 18 Jun 2007 20:57:11 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987732#M21954</guid>
      <dc:creator>zuliani</dc:creator>
      <dc:date>2007-06-18T20:57:11Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987733#M21955</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;Example 9-4 (p 9-55 of IPP manual vol2, Jan 2007) described how to use ippiFilterRow/ColumnBorderPipeline functions to calculate the separable convolution without the intermediate buffer for the whole image.&lt;/P&gt;
&lt;P&gt;The ring buffer (the double pointer) for convolved rows is used there. You should befine the border type for row convolution because there is no data outside the image. But border rows for column convolution are formed manually, so you need not extra argument for them.&lt;/P&gt;
&lt;P&gt;Eg for the 3x3 convolution with replicate border you need to replicate the pointer to the first convolved row.&lt;/P&gt;
&lt;P&gt;Thanks,&lt;/P&gt;
&lt;P&gt;Alexander&lt;/P&gt;
&lt;P&gt;&lt;/P&gt;</description>
      <pubDate>Tue, 19 Jun 2007 08:23:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987733#M21955</guid>
      <dc:creator>Intel_C_Intel</dc:creator>
      <dc:date>2007-06-19T08:23:00Z</dc:date>
    </item>
    <item>
      <title>Separable 2d convolution with nf x mf filter for ni x mi images</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987734#M21956</link>
      <description>&lt;P&gt;Dear Alexander,&lt;/P&gt;
&lt;P&gt;thanks for your prompt response. I must say that I found the Example 9-4 extremely criptic (30 lines of dense code without a single comment...). However I came up with a routine that seems to achive the task. Note that there are a bunch of tricks to handle kernel sizes that can be even or odd. The flipping of the kernel is meant to adapt my routine with the conv2 routine of Matlab (and for the same reasonone might notice "inversion" between row and columns, since Matlabuses column-major ordering of the data, as in Fortran). &lt;/P&gt;
&lt;P&gt;I hope that this can be of some help. I would appreciate any help from the community as far asbugs/improvments are concerned.&lt;/P&gt;&lt;PRE&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;#ifdef&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; MEX_CONV2_SEP_SINGLE_USE_IPP&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;inline&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;void&lt;/FONT&gt;&lt;FONT size="2"&gt; CheckIPPStatus(IppStatus status)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;if&lt;/FONT&gt;&lt;FONT size="2"&gt;(status != ippStsNoErr)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;mexPrintf(&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"
status = %s "&lt;/FONT&gt;&lt;FONT size="2"&gt;, ippGetStatusString(status));&lt;P&gt;&lt;/P&gt;&lt;P&gt;mexErrMsgTxt(&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#800000" size="2"&gt;"IPP error"&lt;/FONT&gt;&lt;FONT size="2"&gt;);&lt;P&gt;&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;P&gt;// Adapted by Marco Zuliani (zuliani@mayachitra.com) from:&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;P&gt;// &lt;A href="http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx" target="_blank"&gt;http://softwarecommunity.intel.com/isn/Community/en-US/forums/5482632/PostAttachment.aspx&lt;/A&gt;&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;P&gt;// INTEL CORPORATION PROPRIETARY INFORMATION&lt;/P&gt;&lt;P&gt;// This software is supplied under the terms of a license agreement or&lt;/P&gt;&lt;P&gt;// nondisclosure agreement with Intel Corporation and may not be copied&lt;/P&gt;&lt;P&gt;// or disclosed except in aNcssordance with the terms of that agreement.&lt;/P&gt;&lt;P&gt;// Copyright (c) 2005 Intel Corporation. All Rights Reserved.&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;P&gt;// Separable 2D convolution example&lt;/P&gt;&lt;P&gt;//&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;void&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt; SepConv(&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f *pSrcBuffer, Ipp32f *pDstBuffer, IppiSize *size, Ipp32f *hc, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nc, Ipp32f *hr, &lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nr)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; sizerow, sizecol, i, j;&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ipp32f **ppDst, **ppSrc, *pTmp;&lt;/P&gt;&lt;P&gt;Ipp8u *pBufferCol, *pBufferRow;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// flip the kernels and align the memory to please IPP&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f(Nc * &lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;float&lt;/FONT&gt;&lt;FONT size="2"&gt;));&lt;P&gt;&lt;/P&gt;&lt;P&gt;Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f(Nr * &lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;float&lt;/FONT&gt;&lt;FONT size="2"&gt;));&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i &amp;lt; Nc; i++) hc_flipped&lt;I&gt; = hc[Nc-i-1];&lt;/I&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(j = 0; j &amp;lt; Nr; j++) hr_flipped&lt;J&gt; = hr[Nr-
j-1];&lt;/J&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// compute the kernel semisizes&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Ncss = Nc &amp;gt;&amp;gt; 1;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; Nrss = Nr &amp;gt;&amp;gt; 1;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; co = 1-(Nc%2);&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;int&lt;/FONT&gt;&lt;FONT size="2"&gt; ro = 1-(Nr%2);&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary dst buffer&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppDst = (Ipp32f**)ippsMalloc_8u(size-&amp;gt;width*size-&amp;gt;height*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f)+(size-&amp;gt;height)*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*));&lt;P&gt;&lt;/P&gt;&lt;P&gt;ppSrc = (Ipp32f**)ippsMalloc_8u((size-&amp;gt;height+Nc)*&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f*));&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// size of temporary buffers &lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(*size, Nc, &amp;amp;sizerow); &lt;/P&gt;&lt;P&gt;ippiFilterColumnPipelineGetBufferSize_32f_C1R(*size, Nr, &amp;amp;sizecol);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// allocate temporary buffers&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;pBufferCol = ippsMalloc_8u(sizecol);&lt;/P&gt;&lt;P&gt;pBufferRow = ippsMalloc_8u(sizerow);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// organize dst buffer &lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;pTmp = (Ipp32f*)(ppDst + size-&amp;gt;height);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(i = 0; i &amp;lt; size-&amp;gt;height; i++, pTmp += size-&amp;gt;width)&lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;ppDst&lt;I&gt; = pTmp;&lt;/I&gt;&lt;/P&gt;&lt;P&gt;ppSrc[i+Nrss-ro] = pTmp;&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// organize replicate border for ippiFilterColumnPipeline_32f_C1R &lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;for&lt;/FONT&gt;&lt;FONT size="2"&gt;(j = 0; j &amp;lt; Nrss; j++) &lt;P&gt;&lt;/P&gt;&lt;P&gt;{&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// top&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc&lt;J&gt; = ppSrc[Nrss-ro];&lt;/J&gt;&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;// bottom&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT size="2"&gt;&lt;P&gt;ppSrc[i-j+Nr-2] = ppDst[size-&amp;gt;height-1];&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// perform the actual convolutions&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;ippiFilterRowBorderPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f*) pSrcBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT colo="" r="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), ppDst, &lt;P&gt;&lt;/P&gt;&lt;P&gt;*size, hc_flipped, Nc, Ncss-co, ippBorderConst, 0, pBufferRow);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;ippiFilterColumnPipeline_32f_C1R((&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;const&lt;/FONT&gt;&lt;FONT size="2"&gt; Ipp32f**)ppSrc, pDstBuffer, size-&amp;gt;width*&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;sizeof&lt;/FONT&gt;&lt;FONT size="2"&gt;(Ipp32f), *size, &lt;P&gt;&lt;/P&gt;&lt;P&gt;hr_flipped, Nr, pBufferCol);&lt;/P&gt;&lt;P&gt;&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#008000" size="2"&gt;&lt;/FONT&gt;&lt;P&gt;&lt;/P&gt;&lt;P&gt;&lt;FONT color="#008000" size="2"&gt;// release some memory&lt;/FONT&gt;&lt;/P&gt;&lt;FONT size="2"&gt;&lt;P&gt;ippsFree(hc_flipped);&lt;/P&gt;&lt;P&gt;ippsFree(hr_flipped);&lt;/P&gt;&lt;P&gt;ippsFree(ppSrc);&lt;/P&gt;&lt;P&gt;ippsFree(ppDst);&lt;/P&gt;&lt;P&gt;ippsFree(pBufferCol);&lt;/P&gt;&lt;P&gt;ippsFree(pBufferRow);&lt;/P&gt;&lt;P&gt;}&lt;/P&gt;&lt;/FONT&gt;&lt;FONT color="#0000ff" size="2"&gt;&lt;P&gt;#endif&lt;/P&gt;&lt;/FONT&gt;&lt;/PRE&gt;</description>
      <pubDate>Tue, 19 Jun 2007 19:23:53 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987734#M21956</guid>
      <dc:creator>zuliani</dc:creator>
      <dc:date>2007-06-19T19:23:53Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987735#M21957</link>
      <description>&lt;DIV style="margin:0px;"&gt;&lt;/DIV&gt;
/*********************************************************************************&lt;BR /&gt;&lt;BR /&gt;Copyright(C) 2004-2009, Riverain Medical Group LLC. All Rights Reserved.&lt;BR /&gt;&lt;BR /&gt;This is UNPUBLISHED PROPRIETARY SOURCE CODE of Riverain Medical Group, LLC.&lt;BR /&gt;The contents of this file may not be disclosed to third parties, copied or&lt;BR /&gt;duplicated in any form, in whole or in part, for use or transmittal, without&lt;BR /&gt;the prior written permission of Riverain Medical Group LLC&lt;BR /&gt;&lt;BR /&gt;**********************************************************************************/&lt;BR /&gt;&lt;BR /&gt;/*!&lt;BR /&gt;* file IppSepFilter.cpp&lt;BR /&gt;* brief Implementation of Separable Filter with IPP&lt;BR /&gt;* author J. Schamus, jschamus@riverainmedical.com&lt;BR /&gt;*/&lt;BR /&gt;&lt;BR /&gt;# pragma once&lt;BR /&gt;# include "Image.h"&lt;BR /&gt;&lt;BR /&gt;static inline IppStatus ippSepFilter( &lt;BR /&gt; const Ipp32f* pSrc,     // Sourse Image &lt;BR /&gt; const int  srcStep,     // Source step&lt;BR /&gt; Ipp32f*   pDst,     // Destination Image&lt;BR /&gt; const int  dstStep,     // Destination step&lt;BR /&gt; const IppiSize roiSize,     // Source/Destination size&lt;BR /&gt; const Ipp32f* hc,      // Column filter&lt;BR /&gt; const int  Nc,       // Column filter size&lt;BR /&gt; const Ipp32f* hr,      // Row filter&lt;BR /&gt; const int  Nr,       // Row filter size&lt;BR /&gt; const PadType&amp;amp; padType = CONSTANT,  // Padding type&lt;BR /&gt; const Ipp32f val = 0.0f )   // Value to use with CONSTANT padding&lt;BR /&gt;{&lt;BR /&gt; IppStatus sts;&lt;BR /&gt;&lt;BR /&gt;
&lt;DIV style="text-align: left;"&gt;int sizerow, sizecol;&lt;BR /&gt;&lt;/DIV&gt;
Ipp32f **ppDst, **ppSrc, *pTmp = NULL;&lt;BR /&gt; Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;&lt;BR /&gt;&lt;BR /&gt; // compute the kernel semisizes&lt;BR /&gt; int Ncss = Nc &amp;gt;&amp;gt; 1;&lt;BR /&gt; int Nrss = Nr &amp;gt;&amp;gt; 1;&lt;BR /&gt;&lt;BR /&gt; // compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)&lt;BR /&gt; int co = 1 - ( Nc % 2 );&lt;BR /&gt; int ro = 1 - ( Nr % 2 );&lt;BR /&gt;&lt;BR /&gt; // allocate temporary dst buffer&lt;BR /&gt; int tmpStep;&lt;BR /&gt; pTmp = ippiMalloc_32f_C1( roiSize.width, roiSize.height + (Ncss * 2), &amp;amp;tmpStep );&lt;BR /&gt; if( !pTmp ) return ippStsMemAllocErr;&lt;BR /&gt; &lt;BR /&gt; int tmpw = tmpStep / sizeof(Ipp32f);&lt;BR /&gt; IppiSize tmpSize;&lt;BR /&gt; tmpSize.height = roiSize.height + (Ncss * 2) - co; tmpSize.width = roiSize.width;&lt;BR /&gt; ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );&lt;BR /&gt;&lt;BR /&gt; ppDst = new Ipp32f*[roiSize.height];&lt;BR /&gt; ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];&lt;BR /&gt;&lt;BR /&gt; // size of temporary buffers&lt;BR /&gt; if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizerow) )&lt;BR /&gt; return sts;&lt;BR /&gt; if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizecol) )&lt;BR /&gt; return sts;&lt;BR /&gt;&lt;BR /&gt; // allocate temporary buffers&lt;BR /&gt; pBufferCol = ippsMalloc_8u( sizecol );&lt;BR /&gt; if( !pBufferCol ) return ippStsMemAllocErr;&lt;BR /&gt; pBufferRow = ippsMalloc_8u( sizerow );&lt;BR /&gt; if( !pBufferRow ) return ippStsMemAllocErr;&lt;BR /&gt;&lt;BR /&gt; Nrss -= ro;&lt;BR /&gt; Ncss -= co;&lt;BR /&gt; // organize dst buffer&lt;BR /&gt; for( int ii=0,jj=Ncss;ii&lt;ROISIZE.HEIGHT&gt;&lt;/ROISIZE.HEIGHT&gt; {&lt;BR /&gt; ppDst[ii] = pTmp + jj * tmpw;&lt;BR /&gt; ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt; }&lt;BR /&gt; &lt;BR /&gt; IppiBorderType borderType;&lt;BR /&gt; &lt;BR /&gt; switch( padType )&lt;BR /&gt; {&lt;BR /&gt; case CONSTANT:&lt;BR /&gt; for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt; {&lt;BR /&gt; ppSrc[ii] = pTmp + ii * tmpw;&lt;BR /&gt; ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt; if( val )&lt;BR /&gt; {&lt;BR /&gt; ippsSet_32f( val, ppSrc[ii], roiSize.width );&lt;BR /&gt; ippsSet_32f( val, ppSrc[jj], roiSize.width );&lt;BR /&gt; }&lt;BR /&gt; else&lt;BR /&gt; {&lt;BR /&gt; ippsZero_32f( ppSrc[ii], roiSize.width );&lt;BR /&gt; ippsZero_32f( ppSrc[jj], roiSize.width );&lt;BR /&gt; }&lt;BR /&gt; }&lt;BR /&gt; if( co )&lt;BR /&gt; {&lt;BR /&gt; ppSrc[roiSize.height+(Ncss*2)] = pTmp + (roiSize.height+(Ncss*2)) * tmpw;&lt;BR /&gt; ippsSet_32f( val, ppSrc[roiSize.height+(Ncss*2)], roiSize.width );&lt;BR /&gt; }&lt;BR /&gt; borderType = ippBorderConst;&lt;BR /&gt; break;&lt;BR /&gt;&lt;BR /&gt; case REPLICATE:&lt;BR /&gt; for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt; {&lt;BR /&gt; ppSrc[ii] = ppSrc[Ncss];&lt;BR /&gt; ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt; }&lt;BR /&gt; if( co )&lt;BR /&gt; {&lt;BR /&gt; ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt; }&lt;BR /&gt; borderType = ippBorderRepl;&lt;BR /&gt; break;&lt;BR /&gt; &lt;BR /&gt; case SYMMETRIC:&lt;BR /&gt; for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt; {&lt;BR /&gt; ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];&lt;BR /&gt; ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];&lt;BR /&gt; }&lt;BR /&gt; if( co )&lt;BR /&gt; {&lt;BR /&gt; ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];&lt;BR /&gt; }&lt;BR /&gt; borderType = ippBorderMirrorR;&lt;BR /&gt; break;&lt;BR /&gt; &lt;BR /&gt; case CIRCULAR:&lt;BR /&gt; for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt; {&lt;BR /&gt; ppSrc[ii] = ppSrc[roiSize.height+ii];&lt;BR /&gt; ppSrc[jj] = ppSrc[ii+Ncss];&lt;BR /&gt; }&lt;BR /&gt; if( co )&lt;BR /&gt; {&lt;BR /&gt; ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];&lt;BR /&gt; }&lt;BR /&gt; borderType = ippBorderWrap;&lt;BR /&gt; break;&lt;BR /&gt; &lt;BR /&gt; default:&lt;BR /&gt; return ippStsPaddingSchemeErr; // ippStsBorderErr missing from ippdefs.h&lt;BR /&gt; }&lt;BR /&gt;&lt;BR /&gt; // perform the actual convolutions&lt;BR /&gt; if( sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*) pSrc, srcStep, &lt;BR /&gt; ppDst, roiSize, hr, Nr, Nrss, borderType, val, pBufferRow) )&lt;BR /&gt; return sts;&lt;BR /&gt; &lt;BR /&gt; if( sts = ippiFilterColumnPipeline_32f_C1R((const Ipp32f**)ppSrc, pDst, dstStep, &lt;BR /&gt; roiSize, hc, Nc, pBufferCol) )&lt;BR /&gt; return sts;&lt;BR /&gt;&lt;BR /&gt; ippsFree(pTmp);&lt;BR /&gt; ippsFree(pBufferCol);&lt;BR /&gt; ippsFree(pBufferRow);&lt;BR /&gt; delete []ppSrc;&lt;BR /&gt; delete []ppDst;&lt;BR /&gt;&lt;BR /&gt; return sts;&lt;BR /&gt;}&lt;BR /&gt; &lt;BR /&gt;&lt;BR /&gt;No Guarntees with this, but it has been tested with both even and odd sized kernals and for all padding types.&lt;BR /&gt;&lt;BR /&gt;Enjoy,&lt;BR /&gt; Jay Schamus&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;</description>
      <pubDate>Thu, 19 Mar 2009 17:30:55 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987735#M21957</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-03-19T17:30:55Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987736#M21958</link>
      <description>&lt;DIV style="margin:0px;"&gt;&lt;/DIV&gt;
Hi Jay,&lt;BR /&gt;&lt;BR /&gt;thanks for sharing of your expertise with IPP developers community. Although the copyright notice of your code may look misleading. Is it possible to disclosure this code?&lt;BR /&gt;&lt;BR /&gt;Regards,&lt;BR /&gt; Vladimir&lt;BR /&gt;</description>
      <pubDate>Fri, 20 Mar 2009 09:35:55 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987736#M21958</guid>
      <dc:creator>Vladimir_Dudnik</dc:creator>
      <dc:date>2009-03-20T09:35:55Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987737#M21959</link>
      <description>&lt;DIV style="margin:0px;"&gt;
&lt;DIV id="quote_reply" style="width: 100%; margin-top: 5px;"&gt;
&lt;DIV style="margin-left:2px;margin-right:2px;"&gt;Quoting - &lt;A href="https://community.intel.com/en-us/profile/336498"&gt;Vladimir Dudnik (Intel)&lt;/A&gt;&lt;/DIV&gt;
&lt;DIV style="background-color:#E5E5E5; padding:5px;border: 1px; border-style: inset;margin-left:2px;margin-right:2px;"&gt;&lt;EM&gt; Hi Jay,&lt;BR /&gt;&lt;BR /&gt;thanks for sharing of your expertise with IPP developers community. Although the copyright notice of your code may look misleading. Is it possible to disclosure this code?&lt;BR /&gt;&lt;BR /&gt;Regards,&lt;BR /&gt; Vladimir&lt;BR /&gt;&lt;/EM&gt;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;BR /&gt;Sure fine. Just acknowledge the source. &lt;BR /&gt;</description>
      <pubDate>Fri, 20 Mar 2009 15:10:36 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987737#M21959</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-03-20T15:10:36Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987738#M21960</link>
      <description>&lt;P&gt;Hi Jay&lt;BR /&gt;&lt;BR /&gt;Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:&lt;BR /&gt;&lt;BR /&gt;1] Should the line &lt;BR /&gt;&lt;BR /&gt;ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];&lt;BR /&gt;&lt;BR /&gt;read instead as:&lt;BR /&gt;&lt;BR /&gt;ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];&lt;BR /&gt;&lt;BR /&gt;so that the loop:&lt;BR /&gt;&lt;BR /&gt;for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;{&lt;BR /&gt; ppSrc[ii] = pTmp + ii * tmpw;&lt;BR /&gt; ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt;&lt;BR /&gt; (...)&lt;BR /&gt;&lt;BR /&gt;will not exceed the boundaries?&lt;BR /&gt;&lt;BR /&gt;2] Should the lines:&lt;BR /&gt;&lt;BR /&gt;// size of temporary buffers&lt;BR /&gt;if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizerow) )&lt;BR /&gt;return sts;&lt;BR /&gt;if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizecol) )&lt;BR /&gt;return sts;&lt;/P&gt;
&lt;BR /&gt;be instead:&lt;BR /&gt;&lt;BR /&gt;// size of temporary buffers&lt;BR /&gt;if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )&lt;BR /&gt;return sts;&lt;BR /&gt;if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )&lt;BR /&gt;return sts;&lt;BR /&gt;&lt;BR /&gt;i.e. with Nr and Nc swapped?&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;Thanks,&lt;BR /&gt;Marco</description>
      <pubDate>Tue, 12 May 2009 19:45:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987738#M21960</guid>
      <dc:creator>zuliani</dc:creator>
      <dc:date>2009-05-12T19:45:00Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987739#M21961</link>
      <description>&lt;DIV style="margin:0px;"&gt;
&lt;DIV id="quote_reply" style="width: 100%; margin-top: 5px;"&gt;
&lt;DIV style="margin-left:2px;margin-right:2px;"&gt;Quoting - &lt;A href="https://community.intel.com/en-us/profile/253608"&gt;zuliani@mayachitra.com&lt;/A&gt;&lt;/DIV&gt;
&lt;DIV style="background-color:#E5E5E5; padding:5px;border: 1px; border-style: inset;margin-left:2px;margin-right:2px;"&gt;&lt;EM&gt;
&lt;P&gt;Hi Jay&lt;BR /&gt;&lt;BR /&gt; Thanks alot for sharing your improved version of the convolution code. I was wondering if you can check a couple of things:&lt;BR /&gt;&lt;BR /&gt;1] Should the line &lt;BR /&gt;&lt;BR /&gt;ppSrc = new Ipp32f*[roiSize.height + (Nrss * 2) - co];&lt;BR /&gt;&lt;BR /&gt;read instead as:&lt;BR /&gt;&lt;BR /&gt;ppSrc = new Ipp32f*[roiSize.height + (Ncss * 2) - co];&lt;BR /&gt;&lt;BR /&gt;so that the loop:&lt;BR /&gt;&lt;BR /&gt;for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;{&lt;BR /&gt; ppSrc[ii] = pTmp + ii * tmpw;&lt;BR /&gt; ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt;&lt;BR /&gt; (...)&lt;BR /&gt;&lt;BR /&gt;will not exceed the boundaries?&lt;BR /&gt;&lt;BR /&gt;2] Should the lines:&lt;BR /&gt;&lt;BR /&gt;// size of temporary buffers&lt;BR /&gt;if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizerow) )&lt;BR /&gt;return sts;&lt;BR /&gt;if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizecol) )&lt;BR /&gt;return sts;&lt;/P&gt;
&lt;BR /&gt;be instead:&lt;BR /&gt;&lt;BR /&gt;// size of temporary buffers&lt;BR /&gt;if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )&lt;BR /&gt;return sts;&lt;BR /&gt;if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )&lt;BR /&gt;return sts;&lt;BR /&gt;&lt;BR /&gt;i.e. with Nr and Nc swapped?&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;Thanks,&lt;BR /&gt;Marco&lt;/EM&gt;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;BR /&gt;Marco,&lt;BR /&gt; sorry to take so long to get back, but I've been busy. Here is the corrected code for this (can't use new and delete ro it will crash once every ~1000 times you run it ). Also, I included a version that runs the filter in reverse order (CR vs. RC), for those that need to match MatLab. Note the Copyright is there but it means that if you use this code please acknowledge the source. And also, one of our guys who is experimenting with 64-bit under Windows 2008 gets a crash every time at the call to ippiFilterRowBorderPipelineGetBufferSize_32f_C1R when builds this as a 64-bit DLL.&lt;BR /&gt;&lt;BR /&gt;
&lt;PRE&gt;[cpp]/*********************************************************************************&lt;BR /&gt;&lt;BR /&gt;Copyright(C) 2004-2009, Riverain Medical Group LLC.  All Rights Reserved.&lt;BR /&gt;&lt;BR /&gt;This is UNPUBLISHED PROPRIETARY SOURCE CODE of Riverain Medical Group, LLC.&lt;BR /&gt;The contents of this file may not be disclosed to third parties,  copied or&lt;BR /&gt;duplicated in any form, in whole or in part, for use or transmittal, without&lt;BR /&gt;the prior written permission of Riverain Medical Group LLC&lt;BR /&gt;&lt;BR /&gt;**********************************************************************************/&lt;BR /&gt;&lt;BR /&gt;/*!&lt;BR /&gt;* file   IppSepFilter.cpp&lt;BR /&gt;* brief  Implementation of Separable Filter with IPP&lt;BR /&gt;* author J. Schamus, jschamus@riverainmedical.com&lt;BR /&gt;*/&lt;BR /&gt;&lt;BR /&gt;# pragma once&lt;BR /&gt;# include &lt;IOSTREAM&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;// Row major version&lt;BR /&gt;static inline IppStatus IppSepFilterRC(	&lt;BR /&gt;									   Ipp32f*			pDst,					// Destination Image&lt;BR /&gt;									   const int&amp;amp;		dstStep,				// Destination step&lt;BR /&gt;									   const Ipp32f*	        pSrc,					// Source Image &lt;BR /&gt;									   const int&amp;amp;		srcStep,				// Source step&lt;BR /&gt;									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size&lt;BR /&gt;									   const Ipp32f*	        hr,					// Row filter&lt;BR /&gt;									   const int&amp;amp;		Nr,					// Row filter size&lt;BR /&gt;									   const Ipp32f*	        hc,					// Column filter&lt;BR /&gt;									   const int&amp;amp;		Nc,					// Column filter size&lt;BR /&gt;									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type&lt;BR /&gt;									   const Ipp32f&amp;amp;	        val	= 0.0f )			// Value to use with CONSTANT padding&lt;BR /&gt;{&lt;BR /&gt;	TRY_AUTO&lt;BR /&gt;	{&lt;BR /&gt;		IppStatus sts;&lt;BR /&gt;&lt;BR /&gt;		int sizerow, sizecol;&lt;BR /&gt;		Ipp32f *pTmp = NULL;&lt;BR /&gt;		Ipp32f *pTmpLocal = NULL;&lt;BR /&gt;		Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;&lt;BR /&gt;&lt;BR /&gt;		//	flip the kernels and align the memory to please IPP &lt;BR /&gt;		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );&lt;BR /&gt;		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );&lt;BR /&gt;&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );&lt;BR /&gt;&lt;BR /&gt;		// 	compute the kernel semisizes&lt;BR /&gt;		int Ncss = Nc &amp;gt;&amp;gt; 1;&lt;BR /&gt;		int Nrss = Nr &amp;gt;&amp;gt; 1;&lt;BR /&gt;&lt;BR /&gt;		// 	compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)&lt;BR /&gt;		int co = 1 - ( Nc % 2 );&lt;BR /&gt;		int ro = 1 - ( Nr % 2 );&lt;BR /&gt;&lt;BR /&gt;		//	allocate temporary dst buffer&lt;BR /&gt;		int tmpStep;&lt;BR /&gt;		int tmpw;&lt;BR /&gt;&lt;BR /&gt;		// 	The IPP filter functions seem to need 1 more row allocated&lt;BR /&gt;		// 	than is obvious or they sometimes crash.&lt;BR /&gt;		int tmpHeight = roiSize.height+Nc+1;&lt;BR /&gt;		int tmpWidth  = roiSize.width;&lt;BR /&gt;&lt;BR /&gt;		if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &amp;amp;tmpStep ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterRC, mem-alloc error. " );&lt;BR /&gt;		pTmp = pTmpLocal;&lt;BR /&gt;		tmpw = tmpStep / sizeof(Ipp32f);&lt;BR /&gt;&lt;BR /&gt;		Ipp32f **ppSrc, **ppDst;&lt;BR /&gt;		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );&lt;BR /&gt;		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );&lt;BR /&gt;&lt;BR /&gt;		if( padType == CONSTANT )&lt;BR /&gt;		{&lt;BR /&gt;			IppiSize tmpSize;&lt;BR /&gt;			tmpSize.height = roiSize.height + Nc + 1; &lt;BR /&gt;			tmpSize.width  = roiSize.width;&lt;BR /&gt;			ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize );&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		// 	size of temporary buffers&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-row-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-col-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		//	allocate temporary buffers&lt;BR /&gt;		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		Nrss -= ro;&lt;BR /&gt;		Ncss -= co;&lt;BR /&gt;&lt;BR /&gt;		// organize dst buffer&lt;BR /&gt;		for( int ii=0,jj=Ncss;ii&amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-row-filter error. ");&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep, &lt;BR /&gt;			roiSize, hc_flipped, Nc, pBufferCol) )&lt;BR /&gt;		{&lt;BR /&gt;			cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterRC, ipp-column-filter error. ");&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }&lt;BR /&gt;		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }&lt;BR /&gt;		if( pTmpLocal )	 { ippiFree(pTmpLocal);	 pTmpLocal	= NULL; }&lt;BR /&gt;		if( pBufferCol )    { ippsFree(pBufferCol); pBufferCol = NULL; }&lt;BR /&gt;		if( pBufferRow )   { ippsFree(pBufferRow); pBufferRow = NULL; }&lt;BR /&gt;		if( hr_flipped )     { ippsFree(hr_flipped); hr_flipped = NULL; }&lt;BR /&gt;		if( hc_flipped )     { ippsFree(hc_flipped); hc_flipped = NULL; }&lt;BR /&gt;&lt;BR /&gt;		return sts;&lt;BR /&gt;	}&lt;BR /&gt;	CATCH_AUTO&lt;BR /&gt;}&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;//&lt;BR /&gt;//&lt;BR /&gt;// Column major version&lt;BR /&gt;static inline IppStatus IppSepFilterCR(	&lt;BR /&gt;									   Ipp32f*			pDst,					// Destination Image&lt;BR /&gt;									   const int&amp;amp;		dstStep,				// Destination step&lt;BR /&gt;									   const Ipp32f*	        pSrc,					// Source Image &lt;BR /&gt;									   const int&amp;amp;		srcStep,				// Source step&lt;BR /&gt;									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size&lt;BR /&gt;									   const Ipp32f*	        hc,					// Column filter&lt;BR /&gt;									   const int&amp;amp;		Nc,					// Column filter size&lt;BR /&gt;									   const Ipp32f*	        hr,					// Row filter&lt;BR /&gt;									   const int&amp;amp;		Nr,					// Row filter size&lt;BR /&gt;									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type&lt;BR /&gt;									   const Ipp32f&amp;amp;	        val	= 0.0f )			// Value to use with CONSTANT padding&lt;BR /&gt;{&lt;BR /&gt;	TRY_AUTO&lt;BR /&gt;	{&lt;BR /&gt;		IppStatus sts;&lt;BR /&gt;&lt;BR /&gt;		int sizerow, sizecol;&lt;BR /&gt;		Ipp32f *pTmp = NULL, *pPad = NULL;&lt;BR /&gt;		Ipp8u  *pBufferCol = NULL, *pBufferRow = NULL;&lt;BR /&gt;&lt;BR /&gt;		// flip the kernels and align the memory to please IPP &lt;BR /&gt;		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );&lt;BR /&gt;		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );&lt;BR /&gt;&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );&lt;BR /&gt;&lt;BR /&gt;		// compute the kernel semisizes&lt;BR /&gt;		int Ncss = Nc &amp;gt;&amp;gt; 1;&lt;BR /&gt;		int Nrss = Nr &amp;gt;&amp;gt; 1;&lt;BR /&gt;&lt;BR /&gt;		// compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)&lt;BR /&gt;		int co = 1 - ( Nc % 2 );&lt;BR /&gt;		int ro = 1 - ( Nr % 2 );&lt;BR /&gt;&lt;BR /&gt;		// allocate temporary dst buffer&lt;BR /&gt;		int tmpStep, padStep;&lt;BR /&gt;		// The IPP filter functions seem to need 1 more row allocated&lt;BR /&gt;		// than is obvious or they sometimes crash.&lt;BR /&gt;		IppiSize tmpSize; &lt;BR /&gt;		tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;&lt;BR /&gt;		if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &amp;amp;tmpStep ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR mem-alloc error." );&lt;BR /&gt;&lt;BR /&gt;		int srcw = srcStep / sizeof(Ipp32f);&lt;BR /&gt;		int dstw = dstStep / sizeof(Ipp32f);&lt;BR /&gt;		int tmpw = tmpStep / sizeof(Ipp32f);&lt;BR /&gt;		ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );&lt;BR /&gt;&lt;BR /&gt;		int padw;&lt;BR /&gt;		IppiSize padSize;&lt;BR /&gt;&lt;BR /&gt;		//	Only need pad space for CONSTANT&lt;BR /&gt;		if( padType == CONSTANT )&lt;BR /&gt;		{&lt;BR /&gt;			if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &amp;amp;padStep ) ) )&lt;BR /&gt;				throw exception( "nIppSepFilterCR mem-alloc error." );&lt;BR /&gt;&lt;BR /&gt;			padw = padStep / sizeof(Ipp32f);&lt;BR /&gt;			padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;&lt;BR /&gt;			ippiSet_32f_C1R( val, pPad, padStep, padSize );&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		Ipp32f **ppSrc, **ppDst;&lt;BR /&gt;		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );&lt;BR /&gt;		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );&lt;BR /&gt;&lt;BR /&gt;		// size of temporary buffers&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		// allocate temporary buffers&lt;BR /&gt;		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		Nrss -= ro;&lt;BR /&gt;		Ncss -= co;&lt;BR /&gt;&lt;BR /&gt;		//	organize dst buffer&lt;BR /&gt;		for( int ii = 0, jj = Ncss; ii &amp;lt; roiSize.height; ++ii, ++jj )&lt;BR /&gt;		{&lt;BR /&gt;			ppDst[ii] = pDst + ii * dstw;&lt;BR /&gt;			ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		IppiBorderType borderType;&lt;BR /&gt;&lt;BR /&gt;		switch( padType )&lt;BR /&gt;		{&lt;BR /&gt;		case CONSTANT:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterCR col-filter error." );&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, &lt;BR /&gt;			ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )&lt;BR /&gt;		{&lt;BR /&gt;			cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterCR row-filter error." );&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }&lt;BR /&gt;		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }&lt;BR /&gt;		if( pTmp )	 	 { ippiFree(pTmp);	 	 pTmp 		= NULL; }&lt;BR /&gt;		if( pPad )		 { ippsFree(pPad);		 pPad       = NULL; };&lt;BR /&gt;		if( pBufferCol )    { ippsFree(pBufferCol); pBufferCol = NULL; };&lt;BR /&gt;		if( pBufferRow )   { ippsFree(pBufferRow); pBufferRow = NULL; };&lt;BR /&gt;		if( hr_flipped )     { ippsFree(hr_flipped); hr_flipped = NULL; };&lt;BR /&gt;		if( hc_flipped )     { ippsFree(hc_flipped); hc_flipped = NULL; };&lt;BR /&gt;&lt;BR /&gt;		return sts;&lt;BR /&gt;&lt;BR /&gt;	}&lt;BR /&gt;	CATCH_AUTO&lt;BR /&gt;}&lt;BR /&gt;[/cpp]&lt;/IOSTREAM&gt;&lt;/PRE&gt;
&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Jul 2009 14:28:05 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987739#M21961</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-07-21T14:28:05Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987740#M21962</link>
      <description>&lt;DIV style="margin:0px;"&gt;&lt;/DIV&gt;
Let me repost the Column major version. I see the code insertion thnigy still has problems. :-(&lt;BR /&gt;
&lt;PRE&gt;[cpp]//
//
// Column major version
static inline IppStatus IppSepFilterCR(	
									   Ipp32f*			pDst,					// Destination Image
									   const int&amp;amp;		dstStep,				// Destination step
									   const Ipp32f*	pSrc,					// Source Image 
									   const int&amp;amp;		srcStep,				// Source step
									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size
									   const Ipp32f*	hc,						// Column filter
									   const int&amp;amp;		Nc,						// Column filter size
									   const Ipp32f*	hr,						// Row filter
									   const int&amp;amp;		Nr,						// Row filter size
									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type
									   const Ipp32f&amp;amp;	val	= 0.0f )			// Value to use with CONSTANT padding
{
	TRY_AUTO
	{
		IppStatus sts;

		int sizerow, sizecol;
		Ipp32f *pTmp = NULL, *pPad = NULL;
		Ipp8u  *pBufferCol = NULL, *pBufferRow = NULL;

		// flip the kernels and align the memory to please IPP 
		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );

		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );

		// compute the kernel semisizes
		int Ncss = Nc &amp;gt;&amp;gt; 1;
		int Nrss = Nr &amp;gt;&amp;gt; 1;

		// compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)
		int co = 1 - ( Nc % 2 );
		int ro = 1 - ( Nr % 2 );

		// allocate temporary dst buffer
		int tmpStep, padStep;
		// The IPP filter functions seem to need 1 more row allocated
		// than is obvious or they sometimes crash.
		IppiSize tmpSize; 
		tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
		if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &amp;amp;tmpStep ) ) )
			throw exception( "nIppSepFilterCR mem-alloc error." );

		int srcw = srcStep / sizeof(Ipp32f);
		int dstw = dstStep / sizeof(Ipp32f);
		int tmpw = tmpStep / sizeof(Ipp32f);
		ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );

		int padw;
		IppiSize padSize;

		//	Only need pad space for CONSTANT
		if( padType == CONSTANT )
		{
			if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &amp;amp;padStep ) ) )
				throw exception( "nIppSepFilterCR mem-alloc error." );

			padw = padStep / sizeof(Ipp32f);
			padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
			ippiSet_32f_C1R( val, pPad, padStep, padSize );
		}

		Ipp32f **ppSrc, **ppDst;
		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );

		// size of temporary buffers
		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");

		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");

		// allocate temporary buffers
		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
			throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");

		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");

		Nrss -= ro;
		Ncss -= co;

		//	organize dst buffer
		for( int ii = 0, jj = Ncss; ii &amp;lt; roiSize.height; ++ii, ++jj )
		{
			ppDst[ii] = pDst + ii * dstw;
			ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
		}

		IppiBorderType borderType;

		switch( padType )
		{
		case CONSTANT:
			for( int ii=0,jj=roiSize.height+Ncss;ii&amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterCR col-filter error." );
			return sts;
		}

		if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, 
			ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
		{
			cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterCR row-filter error." );
			return sts;
		}

		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }
		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }
		if( pTmp )	 	 { ippiFree(pTmp);	 	 pTmp 		= NULL; }
		if( pPad )		 { ippsFree(pPad);		 pPad       = NULL; };
		if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
		if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
		if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
		if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };

		return sts;

	}
	CATCH_AUTO
}
[/cpp]&lt;/PRE&gt;
&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Jul 2009 14:31:17 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987740#M21962</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-07-21T14:31:17Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987741#M21963</link>
      <description>Holy Crap! The code looks okay in the editor window, then it gets cut up when it gets posted. Trying again!&lt;BR /&gt;
&lt;PRE&gt;[cpp]//
//
// Column major version
static inline IppStatus IppSepFilterCR(	
									   Ipp32f*			pDst,					// Destination Image
									   const int&amp;amp;		dstStep,				// Destination step
									   const Ipp32f*	pSrc,					// Source Image 
									   const int&amp;amp;		srcStep,				// Source step
									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size
									   const Ipp32f*	hc,						// Column filter
									   const int&amp;amp;		Nc,						// Column filter size
									   const Ipp32f*	hr,						// Row filter
									   const int&amp;amp;		Nr,						// Row filter size
									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type
									   const Ipp32f&amp;amp;	val	= 0.0f )			// Value to use with CONSTANT padding
{
	TRY_AUTO
	{
		IppStatus sts;

		int sizerow, sizecol;
		Ipp32f *pTmp = NULL, *pPad = NULL;
		Ipp8u  *pBufferCol = NULL, *pBufferRow = NULL;

		// flip the kernels and align the memory to please IPP 
		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );

		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );

		// compute the kernel semisizes
		int Ncss = Nc &amp;gt;&amp;gt; 1;
		int Nrss = Nr &amp;gt;&amp;gt; 1;

		// compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)
		int co = 1 - ( Nc % 2 );
		int ro = 1 - ( Nr % 2 );

		// allocate temporary dst buffer
		int tmpStep, padStep;
		// The IPP filter functions seem to need 1 more row allocated
		// than is obvious or they sometimes crash.
		IppiSize tmpSize; 
		tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;
		if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &amp;amp;tmpStep ) ) )
			throw exception( "nIppSepFilterCR mem-alloc error." );

		int srcw = srcStep / sizeof(Ipp32f);
		int dstw = dstStep / sizeof(Ipp32f);
		int tmpw = tmpStep / sizeof(Ipp32f);
		ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );

		int padw;
		IppiSize padSize;

		//	Only need pad space for CONSTANT
		if( padType == CONSTANT )
		{
			if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &amp;amp;padStep ) ) )
				throw exception( "nIppSepFilterCR mem-alloc error." );

			padw = padStep / sizeof(Ipp32f);
			padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;
			ippiSet_32f_C1R( val, pPad, padStep, padSize );
		}

		Ipp32f **ppSrc, **ppDst;
		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );

		// size of temporary buffers
		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");

		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");

		// allocate temporary buffers
		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
			throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");

		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
			throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");

		Nrss -= ro;
		Ncss -= co;

		//	organize dst buffer
		for( int ii = 0, jj = Ncss; ii &amp;lt; roiSize.height; ++ii, ++jj )
		{
			ppDst[ii] = pDst + ii * dstw;
			ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;
		}

		IppiBorderType borderType;

		switch( padType )
		{
		case CONSTANT:
			for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[ii] = pPad + ii * padw;&lt;BR /&gt;				ppSrc[jj] = pPad + (ii + Ncss) * padw;&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderConst;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case REPLICATE:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[Ncss];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderRepl;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case SYMMETRIC:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderMirrorR;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case CIRCULAR:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[roiSize.height+ii];&lt;BR /&gt;				ppSrc[jj] = ppSrc[ii+Ncss];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderWrap;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		default:&lt;BR /&gt;			// ippStsBorderErr missing from ippdefs.h&lt;BR /&gt;			return ippStsPaddingSchemeErr;  &lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;		// perform the actual convolutions&lt;BR /&gt;		if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep, &lt;BR /&gt;			roiSize, hc_flipped, Nc, pBufferCol) )&lt;BR /&gt;		{&lt;BR /&gt;			//cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterCR col-filter error." );
			return sts;
		}

		if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, 
			ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )
		{
			//cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterCR row-filter error." );
			return sts;
		}

		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }
		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }
		if( pTmp )	 	 { ippiFree(pTmp);	 	 pTmp 		= NULL; }
		if( pPad )		 { ippsFree(pPad);		 pPad       = NULL; };
		if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };
		if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };
		if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };
		if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };

		return sts;

	}
	CATCH_AUTO
}
[/cpp]&lt;/PRE&gt;
&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Jul 2009 14:35:22 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987741#M21963</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-07-21T14:35:22Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987742#M21964</link>
      <description>&lt;DIV style="margin:0px;"&gt;
&lt;DIV id="quote_reply" style="width: 100%; margin-top: 5px;"&gt;
&lt;DIV style="margin-left:2px;margin-right:2px;"&gt;Quoting - &lt;A href="https://community.intel.com/en-us/profile/419011"&gt;jschamus&lt;/A&gt;&lt;/DIV&gt;
&lt;DIV style="background-color:#E5E5E5; padding:5px;border: 1px; border-style: inset;margin-left:2px;margin-right:2px;"&gt;&lt;EM&gt;Holy Crap! The code looks okay in the editor window, then it gets cut up when it gets posted. Trying again!&lt;BR /&gt;
&lt;/EM&gt;&lt;PRE&gt;&lt;EM&gt;[cpp]//&lt;BR /&gt;//&lt;BR /&gt;// Column major version&lt;BR /&gt;static inline IppStatus IppSepFilterCR(	&lt;BR /&gt;									   Ipp32f*			pDst,					// Destination Image&lt;BR /&gt;									   const int&amp;amp;		dstStep,				// Destination step&lt;BR /&gt;									   const Ipp32f*	pSrc,					// Source Image &lt;BR /&gt;									   const int&amp;amp;		srcStep,				// Source step&lt;BR /&gt;									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size&lt;BR /&gt;									   const Ipp32f*	hc,						// Column filter&lt;BR /&gt;									   const int&amp;amp;		Nc,						// Column filter size&lt;BR /&gt;									   const Ipp32f*	hr,						// Row filter&lt;BR /&gt;									   const int&amp;amp;		Nr,						// Row filter size&lt;BR /&gt;									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type&lt;BR /&gt;									   const Ipp32f&amp;amp;	val	= 0.0f )			// Value to use with CONSTANT padding&lt;BR /&gt;{&lt;BR /&gt;	TRY_AUTO&lt;BR /&gt;	{&lt;BR /&gt;		IppStatus sts;&lt;BR /&gt;&lt;BR /&gt;		int sizerow, sizecol;&lt;BR /&gt;		Ipp32f *pTmp = NULL, *pPad = NULL;&lt;BR /&gt;		Ipp8u  *pBufferCol = NULL, *pBufferRow = NULL;&lt;BR /&gt;&lt;BR /&gt;		// flip the kernels and align the memory to please IPP &lt;BR /&gt;		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );&lt;BR /&gt;		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );&lt;BR /&gt;&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );&lt;BR /&gt;		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );&lt;BR /&gt;&lt;BR /&gt;		// compute the kernel semisizes&lt;BR /&gt;		int Ncss = Nc &amp;gt;&amp;gt; 1;&lt;BR /&gt;		int Nrss = Nr &amp;gt;&amp;gt; 1;&lt;BR /&gt;&lt;BR /&gt;		// compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)&lt;BR /&gt;		int co = 1 - ( Nc % 2 );&lt;BR /&gt;		int ro = 1 - ( Nr % 2 );&lt;BR /&gt;&lt;BR /&gt;		// allocate temporary dst buffer&lt;BR /&gt;		int tmpStep, padStep;&lt;BR /&gt;		// The IPP filter functions seem to need 1 more row allocated&lt;BR /&gt;		// than is obvious or they sometimes crash.&lt;BR /&gt;		IppiSize tmpSize; &lt;BR /&gt;		tmpSize.width = roiSize.width; tmpSize.height = roiSize.height + Nc + 1;&lt;BR /&gt;		if( !( pTmp = ippiMalloc_32f_C1( tmpSize.width, tmpSize.height, &amp;amp;tmpStep ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR mem-alloc error." );&lt;BR /&gt;&lt;BR /&gt;		int srcw = srcStep / sizeof(Ipp32f);&lt;BR /&gt;		int dstw = dstStep / sizeof(Ipp32f);&lt;BR /&gt;		int tmpw = tmpStep / sizeof(Ipp32f);&lt;BR /&gt;		ippiSet_32f_C1R( 0.0f, pTmp, tmpStep, tmpSize );&lt;BR /&gt;&lt;BR /&gt;		int padw;&lt;BR /&gt;		IppiSize padSize;&lt;BR /&gt;&lt;BR /&gt;		//	Only need pad space for CONSTANT&lt;BR /&gt;		if( padType == CONSTANT )&lt;BR /&gt;		{&lt;BR /&gt;			if( !( pPad = ippiMalloc_32f_C1( roiSize.width, (Ncss*2) - co, &amp;amp;padStep ) ) )&lt;BR /&gt;				throw exception( "nIppSepFilterCR mem-alloc error." );&lt;BR /&gt;&lt;BR /&gt;			padw = padStep / sizeof(Ipp32f);&lt;BR /&gt;			padSize.height = (Ncss*2) - co; padSize.width = roiSize.width;&lt;BR /&gt;			ippiSet_32f_C1R( val, pPad, padStep, padSize );&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		Ipp32f **ppSrc, **ppDst;&lt;BR /&gt;		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );&lt;BR /&gt;		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );&lt;BR /&gt;&lt;BR /&gt;		// size of temporary buffers&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-size error. ");&lt;BR /&gt;&lt;BR /&gt;		// allocate temporary buffers&lt;BR /&gt;		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-col-mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )&lt;BR /&gt;			throw exception( "nIppSepFilterCR, ipp-row-mem-alloc error. ");&lt;BR /&gt;&lt;BR /&gt;		Nrss -= ro;&lt;BR /&gt;		Ncss -= co;&lt;BR /&gt;&lt;BR /&gt;		//	organize dst buffer&lt;BR /&gt;		for( int ii = 0, jj = Ncss; ii &amp;lt; roiSize.height; ++ii, ++jj )&lt;BR /&gt;		{&lt;BR /&gt;			ppDst[ii] = pDst + ii * dstw;&lt;BR /&gt;			ppSrc[jj] = (Ipp32f *)pSrc + ii * srcw;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		IppiBorderType borderType;&lt;BR /&gt;&lt;BR /&gt;		switch( padType )&lt;BR /&gt;		{&lt;BR /&gt;		case CONSTANT:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii,++jj )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[ii] = pPad + ii * padw;&lt;BR /&gt;				ppSrc[jj] = pPad + (ii + Ncss) * padw;&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = pPad + (Ncss*2) * srcw;&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderConst;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case REPLICATE:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[Ncss];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderRepl;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case SYMMETRIC:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderMirrorR;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case CIRCULAR:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[roiSize.height+ii];&lt;BR /&gt;				ppSrc[jj] = ppSrc[ii+Ncss];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderWrap;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		default:&lt;BR /&gt;			// ippStsBorderErr missing from ippdefs.h&lt;BR /&gt;			return ippStsPaddingSchemeErr;  &lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;&lt;BR /&gt;		// perform the actual convolutions&lt;BR /&gt;		if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pTmp, tmpStep, &lt;BR /&gt;			roiSize, hc_flipped, Nc, pBufferCol) )&lt;BR /&gt;		{&lt;BR /&gt;			//cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterCR col-filter error." );&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pTmp, tmpStep, &lt;BR /&gt;			ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )&lt;BR /&gt;		{&lt;BR /&gt;			//cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;&lt;BR /&gt;			throw exception( "nIppSepFilterCR row-filter error." );&lt;BR /&gt;			return sts;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }&lt;BR /&gt;		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }&lt;BR /&gt;		if( pTmp )	 	 { ippiFree(pTmp);	 	 pTmp 		= NULL; }&lt;BR /&gt;		if( pPad )		 { ippsFree(pPad);		 pPad       = NULL; };&lt;BR /&gt;		if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; };&lt;BR /&gt;		if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; };&lt;BR /&gt;		if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; };&lt;BR /&gt;		if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; };&lt;BR /&gt;&lt;BR /&gt;		return sts;&lt;BR /&gt;&lt;BR /&gt;	}&lt;BR /&gt;	CATCH_AUTO&lt;BR /&gt;}&lt;BR /&gt;[/cpp]&lt;/EM&gt;&lt;/PRE&gt;
&lt;BR /&gt;&lt;/DIV&gt;
&lt;/DIV&gt;
&lt;/DIV&gt;
A small correction to the correction to correction. The section of the code where it messed up should read:&lt;BR /&gt; switch( padType )&lt;BR /&gt; {&lt;BR /&gt; case CONSTANT:&lt;BR /&gt; for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt; {&lt;BR /&gt;&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Jul 2009 14:37:06 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987742#M21964</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-07-21T14:37:06Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987743#M21965</link>
      <description>On closer examination I see that the RC function got chopped up too. Sigh!
&lt;PRE&gt;[cpp]// Row major version
static inline IppStatus IppSepFilterRC(	
									   Ipp32f*			pDst,					// Destination Image
									   const int&amp;amp;		dstStep,				// Destination step
									   const Ipp32f*	pSrc,					// Source Image 
									   const int&amp;amp;		srcStep,				// Source step
									   const IppiSize&amp;amp;	roiSize,				// Source/Destination size
									   const Ipp32f*	hr,						// Row filter
									   const int&amp;amp;		Nr,						// Row filter size
									   const Ipp32f*	hc,						// Column filter
									   const int&amp;amp;		Nc,						// Column filter size
									   const PadType&amp;amp;	padType = CONSTANT,		// Padding type
									   const Ipp32f&amp;amp;	val	= 0.0f )			// Value to use with CONSTANT padding
{
	TRY_AUTO
	{
		IppStatus sts;

		int sizerow, sizecol;
		Ipp32f *pTmp = NULL;
		Ipp32f *pTmpLocal = NULL;
		Ipp8u *pBufferCol = NULL, *pBufferRow = NULL;

		//	flip the kernels and align the memory to please IPP 
		Ipp32f *hc_flipped = (Ipp32f *)ippsMalloc_32f( Nc );
		Ipp32f *hr_flipped = (Ipp32f *)ippsMalloc_32f( Nr );

		ippsFlip_32f((const Ipp32f*)hc, hc_flipped, Nc );
		ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr );

		// 	compute the kernel semisizes
		int Ncss = Nc &amp;gt;&amp;gt; 1;
		int Nrss = Nr &amp;gt;&amp;gt; 1;

		// 	compute the kernel offsets (0 -&amp;gt; odd, 1 -&amp;gt; even)
		int co = 1 - ( Nc % 2 );
		int ro = 1 - ( Nr % 2 );

		//	allocate temporary dst buffer
		int tmpStep;
		int tmpw;

		// 	The IPP filter functions seem to need 1 more row allocated
		// 	than is obvious or they sometimes crash.
		int tmpHeight = roiSize.height+Nc+1;
		int tmpWidth  = roiSize.width;

		if( !( pTmpLocal = ippiMalloc_32f_C1( roiSize.width, roiSize.height + Nc + 1, &amp;amp;tmpStep ) ) )
			throw exception( "nIppSepFilterRC, mem-alloc error. " );
		pTmp = pTmpLocal;
		tmpw = tmpStep / sizeof(Ipp32f);

		Ipp32f **ppSrc, **ppDst;
		ppSrc = (Ipp32f**) ippsMalloc_32f( roiSize.height + Nc + 1 );
		ppDst = (Ipp32f**) ippsMalloc_32f( roiSize.height );

		if( padType == CONSTANT )
		{
			IppiSize tmpSize;
			tmpSize.height = roiSize.height + Nc + 1; 
			tmpSize.width  = roiSize.width;
			ippiSet_32f_C1R( val, pTmp, tmpStep, tmpSize );
		}

		// 	size of temporary buffers
		if( sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R( roiSize, Nr, &amp;amp;sizerow) )
			throw exception( "nIppSepFilterRC, ipp-row-mem-size error. ");

		if( sts = ippiFilterColumnPipelineGetBufferSize_32f_C1R( roiSize, Nc, &amp;amp;sizecol) )
			throw exception( "nIppSepFilterRC, ipp-col-mem-size error. ");

		//	allocate temporary buffers
		if( !( pBufferCol = ippsMalloc_8u( sizecol ) ) )
			throw exception( "nIppSepFilterRC, ipp-col-temp mem-alloc error. ");

		if( !( pBufferRow = ippsMalloc_8u( sizerow ) ) )
			throw exception( "nIppSepFilterRC, ipp-row-temp mem-alloc error. ");

		Nrss -= ro;
		Ncss -= co;

		// organize dst buffer
		for( int ii=0,jj=Ncss;ii&lt;ROISIZE.HEIGHT&gt;&lt;/ROISIZE.HEIGHT&gt;		{&lt;BR /&gt;			ppDst[ii] = pTmp + jj * tmpw;&lt;BR /&gt;			ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		IppiBorderType borderType;&lt;BR /&gt;&lt;BR /&gt;		switch( padType )&lt;BR /&gt;		{&lt;BR /&gt;		case CONSTANT:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = pTmp + ii * tmpw;&lt;BR /&gt;				ppSrc[jj] = pTmp + jj * tmpw;&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = pTmp + (roiSize.height+(Ncss*2)) * tmpw;&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderConst;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case REPLICATE:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[Ncss];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height+Ncss-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderRepl;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case SYMMETRIC:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[(Ncss*2)-ii-1];&lt;BR /&gt;				ppSrc[jj] = ppSrc[roiSize.height+Ncss-ii-1];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[roiSize.height-1];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderMirrorR;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		case CIRCULAR:&lt;BR /&gt;			for( int ii=0,jj=roiSize.height+Ncss;ii&lt;NCSS&gt;&lt;/NCSS&gt;			{&lt;BR /&gt;				ppSrc[ii] = ppSrc[roiSize.height+ii];&lt;BR /&gt;				ppSrc[jj] = ppSrc[ii+Ncss];&lt;BR /&gt;			}&lt;BR /&gt;			if( co )&lt;BR /&gt;			{&lt;BR /&gt;				ppSrc[roiSize.height+(Ncss*2)] = ppSrc[Ncss*2];&lt;BR /&gt;			}&lt;BR /&gt;			borderType = ippBorderWrap;&lt;BR /&gt;			break;&lt;BR /&gt;&lt;BR /&gt;		default:&lt;BR /&gt;			// ippStsBorderErr missing from ippdefs.h&lt;BR /&gt;			return ippStsPaddingSchemeErr;  &lt;BR /&gt;		}&lt;BR /&gt;&lt;BR /&gt;		// perform the actual convolutions&lt;BR /&gt;		if( sts = ippiFilterRowBorderPipeline_32f_C1R( (const Ipp32f*)pSrc, srcStep, &lt;BR /&gt;			ppDst, roiSize, hr_flipped, Nr, Nrss, borderType, val, pBufferRow) )&lt;BR /&gt;		{&lt;BR /&gt;			cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterRC, ipp-row-filter error. ");
			return sts;
		}

		if( sts = ippiFilterColumnPipeline_32f_C1R( (const Ipp32f**)ppSrc, pDst, dstStep, 
			roiSize, hc_flipped, Nc, pBufferCol) )
		{
			cout &amp;lt;&amp;lt; "IPP Error: " &amp;lt;&amp;lt; ippGetStatusString( sts ) &amp;lt;&amp;lt; endl;
			throw exception( "nIppSepFilterRC, ipp-column-filter error. ");
			return sts;
		}

		if( ppSrc )		 { ippsFree(ppSrc);		 ppSrc 		= NULL; }
		if( ppDst )		 { ippsFree(ppDst);		 ppDst 		= NULL; }
		if( pTmpLocal )	 { ippiFree(pTmpLocal);	 pTmpLocal	= NULL; }
		if( pBufferCol ) { ippsFree(pBufferCol); pBufferCol = NULL; }
		if( pBufferRow ) { ippsFree(pBufferRow); pBufferRow = NULL; }
		if( hr_flipped ) { ippsFree(hr_flipped); hr_flipped = NULL; }
		if( hc_flipped ) { ippsFree(hc_flipped); hc_flipped = NULL; }

		return sts;
	}
	CATCH_AUTO
}


[/cpp]&lt;/PRE&gt;
&lt;BR /&gt;</description>
      <pubDate>Tue, 21 Jul 2009 20:27:36 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987743#M21965</guid>
      <dc:creator>jschamus</dc:creator>
      <dc:date>2009-07-21T20:27:36Z</dc:date>
    </item>
    <item>
      <title>Re: Separable 2d convolution with nxn filter</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987744#M21966</link>
      <description>&lt;DIV style="margin:0px;"&gt;&lt;/DIV&gt;
Hello,&lt;BR /&gt;&lt;BR /&gt;thanks for updating your sample of IPP based convolution. I would recommend to use attachment for big code chunks instead of inserting code into post as it may cause truncation.&lt;BR /&gt;&lt;BR /&gt;If you can provide us a sample which cause crach in 64-bit mode we will investigate the reason (you also may quickly check if issue is related to optimized code by dispatching a generic, PX version of IPP library)&lt;BR /&gt;&lt;BR /&gt;Regards,&lt;BR /&gt; Vladimir&lt;BR /&gt;</description>
      <pubDate>Mon, 27 Jul 2009 16:26:38 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987744#M21966</guid>
      <dc:creator>Vladimir_Dudnik</dc:creator>
      <dc:date>2009-07-27T16:26:38Z</dc:date>
    </item>
    <item>
      <title>What is wrong with the</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987745#M21967</link>
      <description>&lt;P&gt;What is wrong with the following code? It crashes only when I call IppRowFilter multiple times.&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;static inline IppStatus IppRowFilter(
&amp;nbsp;Ipp32f*&amp;nbsp;&amp;nbsp;&amp;nbsp;pDst,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Destination Image
&amp;nbsp;const int&amp;nbsp;&amp;nbsp;dstStep,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Destination step
&amp;nbsp;const Ipp32f*&amp;nbsp;pSrc,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Source Image 
&amp;nbsp;const int&amp;nbsp;&amp;nbsp;srcStep,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Source step
&amp;nbsp;const IppiSize&amp;nbsp;dstSize,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Destination size
&amp;nbsp;const Ipp32f*&amp;nbsp;hr,&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Row filter
&amp;nbsp;const int&amp;nbsp;&amp;nbsp;Nr)&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;// Row filter size
{
&amp;nbsp;IppStatus sts;

&amp;nbsp;int sizerow;
&amp;nbsp;Ipp8u *pBufferRow = NULL;
&amp;nbsp;IppiSize srcSize = { dstSize.width + Nr - 1, dstSize.height };

&amp;nbsp;//&amp;nbsp;flip the kernel and align the memory to please IPP 
&amp;nbsp;Ipp32f *hr_flipped = (Ipp32f*)ippsMalloc_32f(Nr*sizeof(Ipp32f));

&amp;nbsp;ippsFlip_32f((const Ipp32f*)hr, hr_flipped, Nr);

&amp;nbsp;Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*sizeof(Ipp32f));

&amp;nbsp;// &amp;nbsp;size of temporary buffer
&amp;nbsp;if (sts = ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(srcSize, Nr, &amp;amp;sizerow)) { goto cleanup; }

&amp;nbsp;//&amp;nbsp;allocate temporary buffer
&amp;nbsp;if (!(pBufferRow = ippsMalloc_8u(sizerow))) { sts = ippStsNoMemErr; goto cleanup; }


&amp;nbsp;// organize dst buffer
&amp;nbsp;for (int ii = 0; ii &amp;lt; dstSize.height; ii++) {
&amp;nbsp;&amp;nbsp;ppDst[ii] = pDst + ii * (dstStep / sizeof(Ipp32f));
&amp;nbsp;}

&amp;nbsp;// perform the actual convolution
&amp;nbsp;if (sts = ippiFilterRowBorderPipeline_32f_C1R((const Ipp32f*)pSrc, srcStep, ppDst, srcSize, hr_flipped, Nr, 0, ippBorderRepl, 0, pBufferRow))
&amp;nbsp;{
&amp;nbsp;&amp;nbsp;goto cleanup;
&amp;nbsp;}

cleanup:
&amp;nbsp;if (ppDst)&amp;nbsp;&amp;nbsp; { ippsFree(ppDst); ppDst = NULL; }
&amp;nbsp;if (pBufferRow)&amp;nbsp; { ippsFree(pBufferRow);&amp;nbsp; pBufferRow = NULL; }
&amp;nbsp;if (hr_flipped)&amp;nbsp; { ippsFree(hr_flipped); hr_flipped = NULL; }

&amp;nbsp;return sts;
}&lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 11 Jan 2016 12:20:43 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987745#M21967</guid>
      <dc:creator>Piotr_Wendykier</dc:creator>
      <dc:date>2016-01-11T12:20:43Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987746#M21968</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;

&lt;P&gt;I can't reproduce crash with your code.&lt;/P&gt;

&lt;P&gt;Could you provide your version of IPP and values of&amp;nbsp;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.008px; line-height: 14.3088px; background-color: rgb(248, 248, 248);"&gt;dstSize,&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.008px; line-height: 14.3088px;"&gt;srcStep,&amp;nbsp;&lt;/SPAN&gt;&lt;SPAN style="color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace; font-size: 13.008px; line-height: 14.3088px; background-color: rgb(248, 248, 248);"&gt;dstSize, filter and filter size?&lt;/SPAN&gt;&lt;/P&gt;

&lt;P&gt;&lt;FONT color="#000000" face="Consolas, Bitstream Vera Sans Mono, Courier New, Courier, monospace"&gt;&lt;SPAN style="line-height: 14.3088px; background-color: rgb(248, 248, 248);"&gt;BR,&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;

&lt;P&gt;&lt;FONT color="#000000" face="Consolas, Bitstream Vera Sans Mono, Courier New, Courier, monospace"&gt;&lt;SPAN style="line-height: 14.3088px; background-color: rgb(248, 248, 248);"&gt;Alexander&lt;/SPAN&gt;&lt;/FONT&gt;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 15 Jan 2016 13:07:31 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987746#M21968</guid>
      <dc:creator>Alexander_U_Intel</dc:creator>
      <dc:date>2016-01-15T13:07:31Z</dc:date>
    </item>
    <item>
      <title>Hi Piotr, </title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987747#M21969</link>
      <description>&lt;P&gt;Hi Piotr,&amp;nbsp;&lt;/P&gt;

&lt;P&gt;The code looks ok, just one small problem about&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&lt;CODE style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px 0.3em 0px 0px !important; border: 0px !important; outline: 0px !important; text-align: right !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: 2.7em !important; line-height: 1.1em !important; min-height: auto !important; display: block !important; background: none !important;"&gt;21&lt;/CODE&gt;&lt;CODE class="spaces" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; background: none !important;"&gt;&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background: none !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;(Ipp32f));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;and&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;ppDst[ii] = pDst + ii * (dstStep /&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-size: 13.008px; line-height: 14.3088px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;(Ipp32f));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;Here ppDst is double pointers. &amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;if in 32bit application, it is ok as sizeof(Ipp32f)=4 and sizeof(Ipp32f *) =4.&amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; line-height: 14.3088px; color: rgb(0, 0, 0); font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; min-height: auto !important; background-image: none !important; background-attachment: initial !important; background-color: rgb(248, 248, 248); background-size: initial !important; background-origin: initial !important; background-clip: initial !important; background-position: initial !important; background-repeat: initial !important;"&gt;but when in 64bit application. sizeof (IPP32f*)=8, not 4. so for safety,it is better to change&amp;nbsp;&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="spaces" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; background: none !important;"&gt;&amp;nbsp;&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Ipp32f **ppDst = (Ipp32f**)ippsMalloc_32f(dstSize.height*&lt;/CODE&gt;&lt;CODE class="keyword bold" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; font-weight: bold !important; min-height: auto !important; color: rgb(0, 102, 153) !important; background: none !important;"&gt;sizeof&lt;/CODE&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;(&lt;STRONG&gt;Ipp32f*&lt;/STRONG&gt;));&lt;/CODE&gt;&lt;/P&gt;

&lt;P&gt;&lt;CODE class="plain" style="font-size: 13.008px; font-family: Consolas, 'Bitstream Vera Sans Mono', 'Courier New', Courier, monospace !important; margin: 0px !important; padding: 0px !important; border: 0px !important; outline: 0px !important; float: none !important; vertical-align: baseline !important; position: static !important; left: auto !important; top: auto !important; right: auto !important; bottom: auto !important; height: auto !important; width: auto !important; line-height: 1.1em !important; min-height: auto !important; color: rgb(0, 0, 0) !important; background: none !important;"&gt;Best Regards,&lt;BR /&gt;
	Ying&lt;/CODE&gt;&lt;BR /&gt;
	&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 28 Jan 2016 03:40:15 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/Separable-2d-convolution-with-nxn-filter/m-p/987747#M21969</guid>
      <dc:creator>Ying_H_Intel</dc:creator>
      <dc:date>2016-01-28T03:40:15Z</dc:date>
    </item>
  </channel>
</rss>

