<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Hi Tony in Intel® oneAPI Math Kernel Library</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153308#M27323</link>
    <description>&lt;P&gt;Hi Tony&lt;BR /&gt;
	&lt;BR /&gt;
	​Thank you a lot for reporting the problem.&lt;BR /&gt;
	if it is possible, could you please tell some background, like your test cpu type, vector size etc. how do you link MKL and IPP etc?&amp;nbsp; one small reproduce case may helpful!&amp;nbsp; If it is private, could you please submit those information to&amp;nbsp; Intel online service center: &amp;nbsp;http://supporttickets.intel.com/&lt;BR /&gt;
	&lt;BR /&gt;
	Thanks&lt;BR /&gt;
	Ying&lt;/P&gt;</description>
    <pubDate>Mon, 09 Jul 2018 01:57:49 GMT</pubDate>
    <dc:creator>Ying_H_Intel</dc:creator>
    <dc:date>2018-07-09T01:57:49Z</dc:date>
    <item>
      <title>vslsConvExecX performance</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153307#M27322</link>
      <description>&lt;P&gt;Using this function&amp;nbsp;vslsConvExecX verses the IPP function IppFilter,. the performance is 10x slower. Does this seem correct?&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 06 Jul 2018 19:21:33 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153307#M27322</guid>
      <dc:creator>Beckett__Tony</dc:creator>
      <dc:date>2018-07-06T19:21:33Z</dc:date>
    </item>
    <item>
      <title>Hi Tony</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153308#M27323</link>
      <description>&lt;P&gt;Hi Tony&lt;BR /&gt;
	&lt;BR /&gt;
	​Thank you a lot for reporting the problem.&lt;BR /&gt;
	if it is possible, could you please tell some background, like your test cpu type, vector size etc. how do you link MKL and IPP etc?&amp;nbsp; one small reproduce case may helpful!&amp;nbsp; If it is private, could you please submit those information to&amp;nbsp; Intel online service center: &amp;nbsp;http://supporttickets.intel.com/&lt;BR /&gt;
	&lt;BR /&gt;
	Thanks&lt;BR /&gt;
	Ying&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jul 2018 01:57:49 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153308#M27323</guid>
      <dc:creator>Ying_H_Intel</dc:creator>
      <dc:date>2018-07-09T01:57:49Z</dc:date>
    </item>
    <item>
      <title>processor	: 0</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153309#M27324</link>
      <description>&lt;PRE class="brush:plain;"&gt;processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 142
model name	: Intel(R) Core(TM) i7-7500U CPU @ 2.70GHz
stepping	: 9
cpu MHz		: 2904.004
cache size	: 4096 KB
physical id	: 0
siblings	: 1
core id		: 0
cpu cores	: 1
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 22
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq monitor ssse3 cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx
 rdrand hypervisor lahf_lm abm 3dnowprefetch avx2 rdseed clflushopt
bogomips	: 5808.00
clflush size	: 64
cache_alignment	: 64
address sizes	: 39 bits physical, 48 bits virtual
&lt;/PRE&gt;

&lt;P&gt;#define IPP_VERSION_STR "2018.0.3"&lt;/P&gt;

&lt;P&gt;#define INTEL_MKL_VERSION 20180002&lt;/P&gt;

&lt;BLOCKQUOTE&gt;
	&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; libmkl_intel_lp64.so =&amp;gt; /opt/intel/mkl/lib/intel64/libmkl_intel_lp64.so (0x00007f986c843000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libmkl_gnu_thread.so =&amp;gt; /opt/intel/mkl/lib/intel64/libmkl_gnu_thread.so (0x00007f986b130000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libmkl_core.so =&amp;gt; /opt/intel/mkl/lib/intel64/libmkl_core.so (0x00007f9867126000)&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;

&lt;BLOCKQUOTE&gt;
	&lt;P&gt;libippcore.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippcore.so (0x00007f529092b000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippcc.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippcc.so (0x00007f5290710000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippch.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippch.so (0x00007f529050a000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippcv.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippcv.so (0x00007f52902e4000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippdc.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippdc.so (0x00007f52900dc000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippi.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippi.so (0x00007f528fe2a000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libipps.so =&amp;gt; /opt/intel/ipp/lib/intel64/libipps.so (0x00007f528fbe0000)&lt;BR /&gt;
		&amp;nbsp;&amp;nbsp; &amp;nbsp;libippvm.so =&amp;gt; /opt/intel/ipp/lib/intel64/libippvm.so (0x00007f528f9c9000)&lt;/P&gt;
&lt;/BLOCKQUOTE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;PRE class="brush:cpp;"&gt;partial code
 
    const int x_stride[2] = { 256,    1 };             
    const int y_stride[2] = {  8, 1 };                 
    const int z_stride[2] = { 256,    1 };    

status = vslsConvNewTaskX(&amp;amp;task,   
                                                VSL_CONV_MODE_AUTO, 
                                        
                                             ? VSL_CONV_MODE_DIRECT                 
                                            
                                         2, 
                                         x_shape,                                    
                                         y_shape,                                     
                                         z_shape,                                     
                                         x,                                         
                                         x_stride);                                  
     
    const int conv_start[2] = { (anchor.y == -1) ? (y_shape[0] - 1) / 2 : anchor.y,    
                                (anchor.x == -1) ? (y_shape[1] - 1) / 2 : anchor.x }; 
                                                                                  
    status = vslConvSetStart(task, conv_start);                                    
    
    status = vslsConvExecX(task,      
                                      y,                                           
                                      y_stride,                                     
                                      z,                                             
                                      z_stride);                                      
     
    status = vslConvDeleteTask(&amp;amp;task);     &lt;/PRE&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 09 Jul 2018 12:42:52 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153309#M27324</guid>
      <dc:creator>Beckett__Tony</dc:creator>
      <dc:date>2018-07-09T12:42:52Z</dc:date>
    </item>
    <item>
      <title>Hi Tony, </title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153310#M27325</link>
      <description>&lt;P&gt;Hi Tony,&amp;nbsp;&lt;BR /&gt;
	&lt;BR /&gt;
	What is your input and&amp;nbsp; how was your IPP filter parameter?&amp;nbsp;&lt;/P&gt;

&lt;P&gt;Best Regards,&lt;/P&gt;

&lt;P&gt;Ying&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 23 Jul 2018 05:38:23 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153310#M27325</guid>
      <dc:creator>Ying_H_Intel</dc:creator>
      <dc:date>2018-07-23T05:38:23Z</dc:date>
    </item>
    <item>
      <title>Hi Tony,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153311#M27326</link>
      <description>&lt;P&gt;Hi Tony,&lt;BR /&gt;
	&lt;BR /&gt;
	​We discussed the issue internally.&amp;nbsp;&amp;nbsp;As you saw,&amp;nbsp; that there are&amp;nbsp;two convolution in MKL, IPP and IPP have&amp;nbsp;better performance than the vslsConvExecX.&amp;nbsp; And we&amp;nbsp;even have one popular&amp;nbsp;library&amp;nbsp;MKL-DNN for convolution : &lt;A href="https://github.com/intel/mkl-dnn" target="_blank"&gt;https://github.com/intel/mkl-dnn&lt;/A&gt;. So&amp;nbsp;we are interested in how and what kind of&amp;nbsp;application you are working, could you tell some background?&lt;BR /&gt;
	&lt;BR /&gt;
	​Best Regards,&lt;BR /&gt;
	​Ying&lt;/P&gt;</description>
      <pubDate>Fri, 27 Jul 2018 05:29:49 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153311#M27326</guid>
      <dc:creator>Ying_H_Intel</dc:creator>
      <dc:date>2018-07-27T05:29:49Z</dc:date>
    </item>
    <item>
      <title>We are doing image analysis.</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153312#M27327</link>
      <description>&lt;P&gt;We are doing image analysis. Currently we are using Linux as the OS. We can compile using either OpenCV or MKL/IPP . In this case for the 2D filter function the OpenCV is 30% faster and we thought that the Intel libraries should be faster. So we are confused.&lt;/P&gt;

&lt;P&gt;You are saying that for a 8x8 kernel on 1024x1024 the IPP should be faster?&lt;/P&gt;</description>
      <pubDate>Fri, 27 Jul 2018 19:16:41 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153312#M27327</guid>
      <dc:creator>Beckett__Tony</dc:creator>
      <dc:date>2018-07-27T19:16:41Z</dc:date>
    </item>
    <item>
      <title>Hi Tony, </title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153313#M27328</link>
      <description>&lt;P&gt;Hi Tony,&amp;nbsp;&lt;BR /&gt;
	&lt;BR /&gt;
	Yes, IPP conv is faster than the functions of&amp;nbsp;&amp;nbsp;&lt;SPAN style="font-size: 12px;"&gt;vslsConvExecX.&amp;nbsp; and what do you mean the openCV is 30% faster?&amp;nbsp; I supposed OpenCV is optimized by IPP by default. ?&amp;nbsp; could you please provide us a small test case?&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;

&lt;P&gt;I attached one we did for IPP test.&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&lt;SPAN style="font-size: 12px;"&gt;Best Regards,&lt;BR /&gt;
	Ying&amp;nbsp;&lt;/SPAN&gt;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;int main(void)&lt;BR /&gt;
	{&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; double time;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; clock_t t;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; IppStatus status = ippStsNoErr;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; Ipp32f* pSrc1 = NULL, *pSrc2 = NULL, *pDst = NULL; /* Pointers to source/destination images */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; int srcStep1 = 0, srcStep2 = 0, dstStep = 0;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;/* Steps, in bytes, through the source/destination images */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; IppiSize dstSize&amp;nbsp; = { 1031, 1031 };&amp;nbsp; &amp;nbsp; &amp;nbsp;/* Size of destination ROI in pixels */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; IppiSize src1Size = { 1024, 1024 };&amp;nbsp; &amp;nbsp; &amp;nbsp;/* Size of destination ROI in pixels */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; IppiSize src2Size = { 8, 8 }; /* Size of destination ROI in pixels */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; int divisor = 2; /* The integer value by which the computed result is divided */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; Ipp8u *pBuffer = NULL;&amp;nbsp; /* Pointer to the work buffer */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; int iTmpBufSize = 0;&amp;nbsp; &amp;nbsp; /* Common work buffer size */&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; int numChannels = 1;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; IppEnum funCfgFull = (IppEnum)(ippAlgAuto | ippiROIFull | ippiNormNone);&lt;/P&gt;

&lt;P&gt;&amp;nbsp; &amp;nbsp; pSrc2 = ippiMalloc_32f_C1(src2Size.width, src2Size.height, &amp;amp;srcStep2);&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; pSrc1 = ippiMalloc_32f_C1(src1Size.width, src1Size.height, &amp;amp;srcStep1);&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; pDst&amp;nbsp; = ippiMalloc_32f_C1(dstSize.width, dstSize.height, &amp;amp;dstStep);&lt;/P&gt;

&lt;P&gt;&amp;nbsp; &amp;nbsp; check_sts( status = ippiConvGetBufferSize(src1Size, src2Size, ipp32f, numChannels, funCfgFull, &amp;amp;iTmpBufSize) )&lt;/P&gt;

&lt;P&gt;&amp;nbsp; &amp;nbsp; pBuffer = ippsMalloc_8u(iTmpBufSize);&lt;/P&gt;

&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for (int i = 0; i &amp;lt; 1048576; ++i) {&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; pSrc1&lt;I&gt; = 1;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for (int i = 0; i &amp;lt; 8 * 8; ++i) {&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; pSrc2&lt;I&gt; = 1;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; t = clock();&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; for (int j = 0; j &amp;lt; 100; ++j) {&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; check_sts(status = ippiConv_32f_C1R(pSrc1, srcStep1, src1Size, pSrc2, srcStep2, src2Size, pDst, dstStep, funCfgFull, pBuffer))&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; }&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; t = clock() - t;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; time = (double)t / CLOCKS_PER_SEC;&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; printf("%f \n", time);&lt;BR /&gt;
	&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; system("pause");&lt;/I&gt;&lt;/I&gt;&lt;/P&gt;

&lt;P&gt;&amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; return 0;&lt;/P&gt;</description>
      <pubDate>Tue, 31 Jul 2018 08:58:17 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/vslsConvExecX-performance/m-p/1153313#M27328</guid>
      <dc:creator>Ying_H_Intel</dc:creator>
      <dc:date>2018-07-31T08:58:17Z</dc:date>
    </item>
  </channel>
</rss>

