<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Hi KIM, ILTAEK. in Intel® Integrated Performance Primitives</title>
    <link>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158735#M26498</link>
    <description>&lt;P&gt;Hi&amp;nbsp;KIM, ILTAEK.&lt;/P&gt;&lt;P&gt;The IPP library uses a so-named "step" between lines because the processed region can be part of big image.&amp;nbsp;&amp;nbsp;I am attaching changes how to work with steps to get expected result.&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt; #include "ipp.h"
 #include &amp;lt;stdio.h&amp;gt;
void main()
{
    const int out_width = 5;
    const int out_height = 9;

    IppStatus status = ippStsNoErr;
    Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL;			/* Pointers to source/destination images */
    int srcStep1 = 0, srcStep2 = 0, dstStep = 0;				/* Steps, in bytes, through the source/destination images */
    IppiSize dstSize = { out_width, out_height };				/* Size of destination ROI in pixels */
    IppiSize src1Size = { 3, 5 };								/* Size of destination ROI in pixels */
    IppiSize src2Size = { 3, 5 };								/* Size of destination ROI in pixels */
    int divisor = 1;											/* The integer value by which the computed result is divided */
    Ipp8u *pBuffer = NULL;										/* Pointer to the work buffer */
    int iTmpBufSize = 0;										/* Common work buffer size */
    int numChannels = 1;
    IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);

    pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &amp;amp;srcStep2);
    pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &amp;amp;srcStep1);
    pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &amp;amp;dstStep);
    Ipp16s img[5][3] = { {1, 1, 1},
                         {1, 0, 0},
                         {1, 1, 1},
                         {0, 0, 1},
                         {1, 1, 1} };
 do {
     status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &amp;amp;iTmpBufSize);
     if (ippStsNoErr != status)
         break;

     pBuffer = ippsMalloc_8u(iTmpBufSize);
     int w, h;
     for (h = 0; h &amp;lt; 5; h++) {
         for (w = 0; w &amp;lt; 3; w++) {
             pSrc1[h*(srcStep1 / 2) + w] = img&lt;H&gt;&lt;W&gt;;
             pSrc2[h*(srcStep2/2)+w]     = img&lt;H&gt;&lt;W&gt;;
         }
     }

     status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
     if (ippStsNoErr != status)
         break;

     for (int j = 0; j &amp;lt; out_height; j++) {
         for (int i = 0; i &amp;lt; out_width; i++) {
             printf("%d ",  pDst[i + j * (dstStep/2)]);
         }
         printf("\n");
     }
 } while (false);

 ippsFree(pBuffer);
 ippiFree(pSrc1);
 ippiFree(pSrc2);
 ippiFree(pDst);
 }
&lt;/W&gt;&lt;/H&gt;&lt;/W&gt;&lt;/H&gt;&lt;/PRE&gt;

&lt;P&gt;Thanks.&lt;/P&gt;</description>
    <pubDate>Thu, 09 Apr 2020 13:18:31 GMT</pubDate>
    <dc:creator>Andrey_B_Intel</dc:creator>
    <dc:date>2020-04-09T13:18:31Z</dc:date>
    <item>
      <title>IPP Convolution</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158734#M26497</link>
      <description>&lt;P&gt;I am testing the convolution using IPP.&lt;/P&gt;&lt;P&gt;I also tested using MKL, but it was not as fast as I thought.&amp;nbsp;However, when implemented simply with IPP, incorrect results were output.&lt;/P&gt;&lt;P&gt;I would like the result of the link below, but please review what's wrong.&lt;/P&gt;&lt;P&gt;&lt;A href="https://community.intel.com/legacyfs/online/drupal_files/did_feeds_images/0EF01A88-F874-4ECB-B2B6-3ADC38636CD4/0EF01A88-F874-4ECB-B2B6-3ADC38636CD4-imageId=B62A36F3-9673-4469-A2A2-C62788A26EB4.jpg"&gt;https://software.intel.com/sites/default/files/did_feeds_images/0EF01A88-F874-4ECB-B2B6-3ADC38636CD4/0EF01A88-F874-4ECB-B2B6-3ADC38636CD4-imageId=B62A36F3-9673-4469-A2A2-C62788A26EB4.jpg&lt;/A&gt;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt;    const int out_width = 5;
	const int out_height = 9;

	IppStatus status = ippStsNoErr;
	Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL;			/* Pointers to source/destination images */
	int srcStep1 = 0, srcStep2 = 0, dstStep = 0;				/* Steps, in bytes, through the source/destination images */
	IppiSize dstSize = { out_width, out_height };				/* Size of destination ROI in pixels */
	IppiSize src1Size = { 3, 5 };								/* Size of destination ROI in pixels */
	IppiSize src2Size = { 3, 5 };								/* Size of destination ROI in pixels */
	int divisor = 1;											/* The integer value by which the computed result is divided */
	Ipp8u *pBuffer = NULL;										/* Pointer to the work buffer */
	int iTmpBufSize = 0;										/* Common work buffer size */
	int numChannels = 1;
	IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);

	pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &amp;amp;srcStep2);
	pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &amp;amp;srcStep1);
	pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &amp;amp;dstStep);

	do {
		status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &amp;amp;iTmpBufSize);
		if (ippStsNoErr != status)
			break;
			
		pBuffer = ippsMalloc_8u(iTmpBufSize);

		pSrc2[0] = pSrc1[0] = 1;
		pSrc2[1] = pSrc1[1] = 1;
		pSrc2[2] = pSrc1[2] = 1;
		pSrc2[3] = pSrc1[3] = 1;
		pSrc2[4] = pSrc1[4] = 0;
		pSrc2[5] = pSrc1[5] = 0;
		pSrc2[6] = pSrc1[6] = 1;
		pSrc2[7] = pSrc1[7] = 1;
		pSrc2[8] = pSrc1[8] = 1;
		pSrc2[9] = pSrc1[9] = 0;
		pSrc2[10] = pSrc1[10] = 0;
		pSrc2[11] = pSrc1[11] = 1;
		pSrc2[12] = pSrc1[12] = 1;
		pSrc2[13] = pSrc1[13] = 1;
		pSrc2[14] = pSrc1[14] = 1;

		status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
		if (ippStsNoErr != status)
			break;
		
		for (int j = 0; j &amp;lt; out_height; j++) {
			for (int i = 0; i &amp;lt; out_width; i++) {
				cout &amp;lt;&amp;lt; pDst[i + j*out_width] &amp;lt;&amp;lt; " ";
			}
			cout &amp;lt;&amp;lt; endl;
		}
	} while (false);

	ippsFree(pBuffer);
	ippiFree(pSrc1);
	ippiFree(pSrc2);
	ippiFree(pDst);&lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 09 Apr 2020 09:00:11 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158734#M26497</guid>
      <dc:creator>KIM__ILTAEK</dc:creator>
      <dc:date>2020-04-09T09:00:11Z</dc:date>
    </item>
    <item>
      <title>Hi KIM, ILTAEK.</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158735#M26498</link>
      <description>&lt;P&gt;Hi&amp;nbsp;KIM, ILTAEK.&lt;/P&gt;&lt;P&gt;The IPP library uses a so-named "step" between lines because the processed region can be part of big image.&amp;nbsp;&amp;nbsp;I am attaching changes how to work with steps to get expected result.&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt; #include "ipp.h"
 #include &amp;lt;stdio.h&amp;gt;
void main()
{
    const int out_width = 5;
    const int out_height = 9;

    IppStatus status = ippStsNoErr;
    Ipp16s* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL;			/* Pointers to source/destination images */
    int srcStep1 = 0, srcStep2 = 0, dstStep = 0;				/* Steps, in bytes, through the source/destination images */
    IppiSize dstSize = { out_width, out_height };				/* Size of destination ROI in pixels */
    IppiSize src1Size = { 3, 5 };								/* Size of destination ROI in pixels */
    IppiSize src2Size = { 3, 5 };								/* Size of destination ROI in pixels */
    int divisor = 1;											/* The integer value by which the computed result is divided */
    Ipp8u *pBuffer = NULL;										/* Pointer to the work buffer */
    int iTmpBufSize = 0;										/* Common work buffer size */
    int numChannels = 1;
    IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);

    pSrc2 = ippiMalloc_16s_C1(src2Size.width, src2Size.height, &amp;amp;srcStep2);
    pSrc1 = ippiMalloc_16s_C1(src1Size.width, src1Size.height, &amp;amp;srcStep1);
    pDst = ippiMalloc_16s_C1(dstSize.width, dstSize.height, &amp;amp;dstStep);
    Ipp16s img[5][3] = { {1, 1, 1},
                         {1, 0, 0},
                         {1, 1, 1},
                         {0, 0, 1},
                         {1, 1, 1} };
 do {
     status = ippiConvGetBufferSize(src1Size, src2Size, ipp16s, numChannels, funCfgFull, &amp;amp;iTmpBufSize);
     if (ippStsNoErr != status)
         break;

     pBuffer = ippsMalloc_8u(iTmpBufSize);
     int w, h;
     for (h = 0; h &amp;lt; 5; h++) {
         for (w = 0; w &amp;lt; 3; w++) {
             pSrc1[h*(srcStep1 / 2) + w] = img&lt;H&gt;&lt;W&gt;;
             pSrc2[h*(srcStep2/2)+w]     = img&lt;H&gt;&lt;W&gt;;
         }
     }

     status = ippiConv_16s_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, divisor, funCfgFull, pBuffer);
     if (ippStsNoErr != status)
         break;

     for (int j = 0; j &amp;lt; out_height; j++) {
         for (int i = 0; i &amp;lt; out_width; i++) {
             printf("%d ",  pDst[i + j * (dstStep/2)]);
         }
         printf("\n");
     }
 } while (false);

 ippsFree(pBuffer);
 ippiFree(pSrc1);
 ippiFree(pSrc2);
 ippiFree(pDst);
 }
&lt;/W&gt;&lt;/H&gt;&lt;/W&gt;&lt;/H&gt;&lt;/PRE&gt;

&lt;P&gt;Thanks.&lt;/P&gt;</description>
      <pubDate>Thu, 09 Apr 2020 13:18:31 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158735#M26498</guid>
      <dc:creator>Andrey_B_Intel</dc:creator>
      <dc:date>2020-04-09T13:18:31Z</dc:date>
    </item>
    <item>
      <title>Kim, </title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158736#M26499</link>
      <description>&lt;P&gt;Kim,&amp;nbsp;&lt;/P&gt;&lt;P&gt;please keep us informed of the performance results you will obtain.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Gennady&lt;/P&gt;</description>
      <pubDate>Thu, 09 Apr 2020 15:02:59 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158736#M26499</guid>
      <dc:creator>Gennady_F_Intel</dc:creator>
      <dc:date>2020-04-09T15:02:59Z</dc:date>
    </item>
    <item>
      <title>Hi, Andrey Bakshaev</title>
      <link>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158737#M26500</link>
      <description>&lt;P&gt;Hi, Andrey Bakshaev&lt;/P&gt;&lt;P&gt;Thank your shared information.&amp;nbsp;Your advice has been a great help.&lt;/P&gt;&lt;P&gt;Let me ask you one more question.&lt;BR /&gt;Actually I want to use ippiROISame option. But when ippiROISame is used ippiConvGetBufferSize is returning an error.&amp;nbsp;If I&amp;nbsp;change the code below from the code you guided above, it becomes a problem. Is there a solution?&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt;	//const int out_width = 5;
	//const int out_height = 9;
	const int out_width = 3;
	const int out_height = 5;

	//IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);
	IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROISame | ippiNormNone);&lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 10 Apr 2020 00:56:19 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Integrated-Performance/IPP-Convolution/m-p/1158737#M26500</guid>
      <dc:creator>KIM__ILTAEK</dc:creator>
      <dc:date>2020-04-10T00:56:19Z</dc:date>
    </item>
  </channel>
</rss>

