<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic oneMKL blas -  performance regression on Intel CPUs in Intel® oneAPI Math Kernel Library</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349129#M32532</link>
    <description>&lt;P&gt;I'm running a simple axpy using the oneMKL blas interface and it's really slow compared to a non optimized SYCL kernel.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="cpp"&gt;#include "oneapi/mkl.hpp"
#include &amp;lt;chrono&amp;gt;

using namespace cl;
using namespace std::chrono;

// Run benchmarks
int main(int argc, char** argv) {
  using T = double;

  sycl::queue queue{sycl::cpu_selector{}};
  constexpr std::size_t size = 1e9;
  T* x = sycl::malloc_device&amp;lt;T&amp;gt;(size, queue);
  T* y = sycl::malloc_device&amp;lt;T&amp;gt;(size, queue);
  queue.fill(x, T{1.0}, size).wait();
  queue.fill(y, T{2.0}, size).wait();

  T alpha = 3.;
  int num_iter = 5;
  for (int i = 0; i &amp;lt; num_iter; i++) {
    auto start = high_resolution_clock::now();
    oneapi::mkl::blas::axpy(queue, size, alpha, x, 1, y, 1).wait();
    auto end = high_resolution_clock::now();
    double t = duration_cast&amp;lt;duration&amp;lt;double&amp;gt;&amp;gt;(end - start).count();
    std::cout &amp;lt;&amp;lt; i &amp;lt;&amp;lt; " oneMKL: " &amp;lt;&amp;lt; t &amp;lt;&amp;lt; " seconds" &amp;lt;&amp;lt; std::endl;
  }

  for (int i = 0; i &amp;lt; num_iter; i++) {
    auto start = high_resolution_clock::now();
    auto e = queue.submit([&amp;amp;](sycl::handler&amp;amp; h) {
      h.parallel_for(sycl::range&amp;lt;1&amp;gt;{size}, [=](sycl::item&amp;lt;1&amp;gt; it) {
        const std::size_t i = it.get_id();
        x[i] = alpha * y[i] + x[i];
      });
    });
    e.wait();
    auto end = high_resolution_clock::now();
    double t = duration_cast&amp;lt;duration&amp;lt;double&amp;gt;&amp;gt;(end - start).count();
    std::cout &amp;lt;&amp;lt; i &amp;lt;&amp;lt; " SYCL: " &amp;lt;&amp;lt; t &amp;lt;&amp;lt; " seconds" &amp;lt;&amp;lt; std::endl;
  }

  sycl::free(x, queue);
  sycl::free(y, queue);

  return 0;
}&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Command used to compile (from&amp;nbsp;Intel® oneAPI Math Kernel Library Link Line Advisor).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="bash"&gt;dpcpp -Ofast -L${MKLROOT}/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_tbb_thread -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl  -DMKL_ILP64  -I"${MKLROOT}/include" test.cpp&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Version:&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;Intel(R) oneAPI DPC++/C++ Compiler 2022.0.0 (2022.0.0.20211123)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Output:&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;Ice Lake -&amp;nbsp;Model name: Intel(R) Xeon(R) Platinum 8368Q CPU @ 2.60GHz&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 1.35613 seconds
1 oneMKL: 1.5168 seconds
2 oneMKL: 1.4051 seconds
3 oneMKL: 1.38451 seconds
4 oneMKL: 1.40654 seconds

0 SYCL: 0.12582 seconds
1 SYCL: 0.125947 seconds
2 SYCL: 0.126261 seconds
3 SYCL: 0.128162 seconds
4 SYCL: 0.123251 seconds&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;This happens both with the installation using spack and the offload installer.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Similar result running the code on &lt;STRONG&gt;devcloud&lt;/STRONG&gt;:&lt;/P&gt;
&lt;P&gt;Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 2.77735 seconds
1 oneMKL: 2.2834 seconds
2 oneMKL: 2.05315 seconds
3 oneMKL: 2.44329 seconds
4 oneMKL: 1.96935 seconds

0 SYCL: 0.513233 seconds
1 SYCL: 0.494699 seconds
2 SYCL: 0.512073 seconds
3 SYCL: 0.50423 seconds
4 SYCL: 0.494641 second&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Am I missing something?&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Thu, 06 Jan 2022 21:11:39 GMT</pubDate>
    <dc:creator>IgorBaratta</dc:creator>
    <dc:date>2022-01-06T21:11:39Z</dc:date>
    <item>
      <title>oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349129#M32532</link>
      <description>&lt;P&gt;I'm running a simple axpy using the oneMKL blas interface and it's really slow compared to a non optimized SYCL kernel.&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="cpp"&gt;#include "oneapi/mkl.hpp"
#include &amp;lt;chrono&amp;gt;

using namespace cl;
using namespace std::chrono;

// Run benchmarks
int main(int argc, char** argv) {
  using T = double;

  sycl::queue queue{sycl::cpu_selector{}};
  constexpr std::size_t size = 1e9;
  T* x = sycl::malloc_device&amp;lt;T&amp;gt;(size, queue);
  T* y = sycl::malloc_device&amp;lt;T&amp;gt;(size, queue);
  queue.fill(x, T{1.0}, size).wait();
  queue.fill(y, T{2.0}, size).wait();

  T alpha = 3.;
  int num_iter = 5;
  for (int i = 0; i &amp;lt; num_iter; i++) {
    auto start = high_resolution_clock::now();
    oneapi::mkl::blas::axpy(queue, size, alpha, x, 1, y, 1).wait();
    auto end = high_resolution_clock::now();
    double t = duration_cast&amp;lt;duration&amp;lt;double&amp;gt;&amp;gt;(end - start).count();
    std::cout &amp;lt;&amp;lt; i &amp;lt;&amp;lt; " oneMKL: " &amp;lt;&amp;lt; t &amp;lt;&amp;lt; " seconds" &amp;lt;&amp;lt; std::endl;
  }

  for (int i = 0; i &amp;lt; num_iter; i++) {
    auto start = high_resolution_clock::now();
    auto e = queue.submit([&amp;amp;](sycl::handler&amp;amp; h) {
      h.parallel_for(sycl::range&amp;lt;1&amp;gt;{size}, [=](sycl::item&amp;lt;1&amp;gt; it) {
        const std::size_t i = it.get_id();
        x[i] = alpha * y[i] + x[i];
      });
    });
    e.wait();
    auto end = high_resolution_clock::now();
    double t = duration_cast&amp;lt;duration&amp;lt;double&amp;gt;&amp;gt;(end - start).count();
    std::cout &amp;lt;&amp;lt; i &amp;lt;&amp;lt; " SYCL: " &amp;lt;&amp;lt; t &amp;lt;&amp;lt; " seconds" &amp;lt;&amp;lt; std::endl;
  }

  sycl::free(x, queue);
  sycl::free(y, queue);

  return 0;
}&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Command used to compile (from&amp;nbsp;Intel® oneAPI Math Kernel Library Link Line Advisor).&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="bash"&gt;dpcpp -Ofast -L${MKLROOT}/lib/intel64 -lmkl_sycl -lmkl_intel_ilp64 -lmkl_tbb_thread -lmkl_core -lsycl -lOpenCL -lpthread -lm -ldl  -DMKL_ILP64  -I"${MKLROOT}/include" test.cpp&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Version:&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;Intel(R) oneAPI DPC++/C++ Compiler 2022.0.0 (2022.0.0.20211123)&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;STRONG&gt;Output:&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;Ice Lake -&amp;nbsp;Model name: Intel(R) Xeon(R) Platinum 8368Q CPU @ 2.60GHz&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 1.35613 seconds
1 oneMKL: 1.5168 seconds
2 oneMKL: 1.4051 seconds
3 oneMKL: 1.38451 seconds
4 oneMKL: 1.40654 seconds

0 SYCL: 0.12582 seconds
1 SYCL: 0.125947 seconds
2 SYCL: 0.126261 seconds
3 SYCL: 0.128162 seconds
4 SYCL: 0.123251 seconds&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;This happens both with the installation using spack and the offload installer.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Similar result running the code on &lt;STRONG&gt;devcloud&lt;/STRONG&gt;:&lt;/P&gt;
&lt;P&gt;Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 2.77735 seconds
1 oneMKL: 2.2834 seconds
2 oneMKL: 2.05315 seconds
3 oneMKL: 2.44329 seconds
4 oneMKL: 1.96935 seconds

0 SYCL: 0.513233 seconds
1 SYCL: 0.494699 seconds
2 SYCL: 0.512073 seconds
3 SYCL: 0.50423 seconds
4 SYCL: 0.494641 second&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Am I missing something?&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 06 Jan 2022 21:11:39 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349129#M32532</guid>
      <dc:creator>IgorBaratta</dc:creator>
      <dc:date>2022-01-06T21:11:39Z</dc:date>
    </item>
    <item>
      <title>Re: oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349147#M32533</link>
      <description>&lt;P&gt;In my project, I'm using the following cmake commands for linking, so I reckon te issue is not only with linking.&lt;/P&gt;
&lt;LI-CODE lang="bash"&gt;#CXX=dpcpp
find_package(MKL CONFIG REQUIRED)

target_compile_options(${PROJECT_NAME} PUBLIC $&amp;lt;TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_COMPILE_OPTIONS&amp;gt;)
target_include_directories(${PROJECT_NAME} PUBLIC $&amp;lt;TARGET_PROPERTY:MKL::MKL_DPCPP,INTERFACE_INCLUDE_DIRECTORIES&amp;gt;)
target_link_libraries(${PROJECT_NAME} PUBLIC $&amp;lt;LINK_ONLY:MKL::MKL_DPCPP&amp;gt;)&lt;/LI-CODE&gt;
&lt;P&gt;But I still get the same performance regression (compared to the C interface).&lt;BR /&gt;It's worth mentioning that this issue is not unique to axpy, but I observed the same behaviour for other "level 1" blas functions when using the SYCL interface.&lt;/P&gt;</description>
      <pubDate>Thu, 06 Jan 2022 21:21:53 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349147#M32533</guid>
      <dc:creator>IgorBaratta</dc:creator>
      <dc:date>2022-01-06T21:21:53Z</dc:date>
    </item>
    <item>
      <title>Re: oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349374#M32534</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks for reaching out to us.&lt;/P&gt;
&lt;P&gt;We tried reproducing the issue from our end on 2 different processors and observed that on one CPU, the timings are almost similar.&lt;/P&gt;
&lt;P&gt;Here are the Results&lt;/P&gt;
&lt;P&gt;Device:&lt;STRONG&gt; Intel(R) Xeon(R) E-2176G CPU @ 3.70GHz&lt;/STRONG&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 0.801352 seconds
1 oneMKL: 0.77536 seconds
2 oneMKL: 0.784574 seconds
3 oneMKL: 0.773554 seconds
4 oneMKL: 0.772544 seconds

0 SYCL: 0.753969 seconds
1 SYCL: 0.754054 seconds
2 SYCL: 0.753803 seconds
3 SYCL: 0.753249 seconds
4 SYCL: 0.80803 seconds&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;But when tried on this CPU, the issue is reproducible&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Device:&lt;STRONG&gt; Intel(R) Xeon(R) Gold 6128 CPU @ 3.40GHz&lt;/STRONG&gt;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;0 oneMKL: 1.82411 seconds
1 oneMKL: 2.82107 seconds
2 oneMKL: 4.32416 seconds
3 oneMKL: 4.77222 seconds
4 oneMKL: 2.69084 seconds

0 SYCL: 0.986935 seconds
1 SYCL: 0.994626 seconds
2 SYCL: 0.961732 seconds
3 SYCL: 0.997382 seconds
4 SYCL: 0.966748 seconds&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Could you please let us know the OS details on which you are working?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Regards,&lt;/P&gt;
&lt;P&gt;Vidya.&lt;/P&gt;</description>
      <pubDate>Wed, 12 Jan 2022 05:52:05 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349374#M32534</guid>
      <dc:creator>VidyalathaB_Intel</dc:creator>
      <dc:date>2022-01-12T05:52:05Z</dc:date>
    </item>
    <item>
      <title>Re: oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349385#M32535</link>
      <description>&lt;P&gt;Hi,&lt;BR /&gt;Thanks for your reply.&lt;BR /&gt;&lt;BR /&gt;I've tested the code on devcloud (which I assume uses Ubuntu 18.04 or 20.04), I also tested it on our local cluster with Centos 8.&lt;/P&gt;
&lt;P&gt;A third system I'm using runs on Red Hat 8.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Best,&lt;/P&gt;
&lt;P&gt;Igor&lt;/P&gt;</description>
      <pubDate>Fri, 07 Jan 2022 11:14:06 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1349385#M32535</guid>
      <dc:creator>IgorBaratta</dc:creator>
      <dc:date>2022-01-07T11:14:06Z</dc:date>
    </item>
    <item>
      <title>Re:oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1350679#M32549</link>
      <description>&lt;P&gt;Hi Igor,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Thanks for providing us with the details.&lt;/P&gt;&lt;P&gt;We are working on your issue, we will get back to you soon.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Regards,&lt;/P&gt;&lt;P&gt;Vidya.&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Wed, 12 Jan 2022 05:51:04 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1350679#M32549</guid>
      <dc:creator>VidyalathaB_Intel</dc:creator>
      <dc:date>2022-01-12T05:51:04Z</dc:date>
    </item>
    <item>
      <title>Re:oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1350726#M32552</link>
      <description>&lt;P&gt;Igor,&lt;/P&gt;&lt;P&gt;it might be an optimization problem wrt all L1 functions and we will check this case. &lt;/P&gt;&lt;P&gt;-Gennady&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Wed, 12 Jan 2022 07:49:43 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1350726#M32552</guid>
      <dc:creator>Gennady_F_Intel</dc:creator>
      <dc:date>2022-01-12T07:49:43Z</dc:date>
    </item>
    <item>
      <title>Re:oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1375355#M32985</link>
      <description>&lt;P&gt;Hi Igor,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;We have had this issue resolved.  The fix will be in the upcoming version, 2022.1, of oneMKL.&lt;/P&gt;&lt;P&gt;This release will be announced soon.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Best regards,&lt;/P&gt;&lt;P&gt;Khang&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Thu, 07 Apr 2022 21:54:10 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1375355#M32985</guid>
      <dc:creator>Khang_N_Intel</dc:creator>
      <dc:date>2022-04-07T21:54:10Z</dc:date>
    </item>
    <item>
      <title>Re:oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1389393#M33218</link>
      <description>&lt;P&gt;Hi Igor,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;The issue has been fixed oneMKL 2022.1.&lt;/P&gt;&lt;P&gt;The Intel(r)  oneAPI Base Toolkit 2022.2 (containing oneMKL 2022.1) has been released.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Could you verify that the issue is fixed on your end?&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Thanks,&lt;/P&gt;&lt;P&gt;Khang&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Wed, 01 Jun 2022 23:29:20 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1389393#M33218</guid>
      <dc:creator>Khang_N_Intel</dc:creator>
      <dc:date>2022-06-01T23:29:20Z</dc:date>
    </item>
    <item>
      <title>Re:oneMKL blas -  performance regression on Intel CPUs</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1390302#M33236</link>
      <description>&lt;P&gt;Hi Igor,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Since the fixed has been implemented in oneMKL 2022.1 and that version of oneMKL has been released for quite some time, I am going to close this thread.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;This thread will no longer be monitored.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Best regards,&lt;/P&gt;&lt;P&gt;Khang&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Mon, 06 Jun 2022 18:49:47 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-Math-Kernel-Library/oneMKL-blas-performance-regression-on-Intel-CPUs/m-p/1390302#M33236</guid>
      <dc:creator>Khang_N_Intel</dc:creator>
      <dc:date>2022-06-06T18:49:47Z</dc:date>
    </item>
  </channel>
</rss>

