<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re: 2D array on GPU with USM in Intel® oneAPI DPC++/C++ Compiler</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1295992#M1375</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;Thanks for reaching out to us.&lt;/P&gt;
&lt;P&gt;We are also able to reproduce the same issue on our end.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;We are looking into your issue internally. We will get back to you soon.&lt;/P&gt;
&lt;P&gt;Meanwhile, could you please provide the following environment details&lt;/P&gt;
&lt;P&gt;&amp;nbsp; Compiler version&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; OS &amp;amp; it's version.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks &amp;amp; Regards&lt;/P&gt;
&lt;P&gt;Noorjahan.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Fri, 16 Jul 2021 06:21:28 GMT</pubDate>
    <dc:creator>NoorjahanSk_Intel</dc:creator>
    <dc:date>2021-07-16T06:21:28Z</dc:date>
    <item>
      <title>2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1295723#M1374</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;I am porting my code to DPC++ but I have run into a problem. I have narrowed down the problem to this unit test.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;#include &amp;lt;CL/sycl.hpp&amp;gt;
#include &amp;lt;array&amp;gt;
#include &amp;lt;iostream&amp;gt;
#if FPGA || FPGA_EMULATOR
#include &amp;lt;CL/sycl/INTEL/fpga_extensions.hpp&amp;gt;
#endif

using namespace sycl;

#define M 4
#define N 5
#define M_LEN (M + 2)
#define N_LEN (N + 2)
#define DOMAIN_SIZE M_LEN*N_LEN
#define DIM 1


void VecAdd(queue &amp;amp;q, range&amp;lt;DIM&amp;gt; R, const int a[DOMAIN_SIZE], const int b[DOMAIN_SIZE], int sum[DOMAIN_SIZE]) {

  auto e = q.parallel_for(R, [=](auto i) { 
      sum[i] = a[i] + b[i]; 
  });

  e.wait();
}

int main() {
    auto R = range&amp;lt;1&amp;gt;{DOMAIN_SIZE};
    default_selector d_selector;
    queue q(d_selector);
    std::cout &amp;lt;&amp;lt; "Device: " &amp;lt;&amp;lt; q.get_device().get_info&amp;lt;info::device::name&amp;gt;() &amp;lt;&amp;lt; std::endl;
    
    int **u = malloc_shared&amp;lt;int *&amp;gt;(3*DOMAIN_SIZE, q);
    int **v = malloc_shared&amp;lt;int *&amp;gt;(3*DOMAIN_SIZE, q);
    int **p = malloc_shared&amp;lt;int *&amp;gt;(3*DOMAIN_SIZE, q);
    
    int u_[3][DOMAIN_SIZE]; int *_u_[3] = {u_[0], u_[1], u_[2]}; u = _u_;
    int v_[3][DOMAIN_SIZE]; int *_v_[3] = {v_[0], v_[1], v_[2]}; v = _v_;
    int p_[3][DOMAIN_SIZE]; int *_p_[3] = {p_[0], p_[1], p_[2]}; p = _p_;
    
    auto e = q.parallel_for(R, [=](auto i) { 
        u[0][i] = i;
        v[0][i] = 2*i;
    });
    
    VecAdd(q, R, u[0], v[0], p[0]);
    
    for (int i=0; i&amp;lt;DOMAIN_SIZE; i++)
      std::cout &amp;lt;&amp;lt; "p[0][" &amp;lt;&amp;lt; i &amp;lt;&amp;lt; "] = " &amp;lt;&amp;lt; p[0][i] &amp;lt;&amp;lt; std::endl;
    
    free(u, q);
    free(v, q);
    free(p, q);
    
    return 0;
}&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;This code compiles but throws the following error:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;terminate called after throwing an instance of 'cl::sycl::runtime_error'
  what():  Native API failed. Native API returns: -30 (CL_INVALID_VALUE) -30 (CL_INVALID_VALUE)
Aborted&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;As discussed previously&amp;nbsp;&lt;A href="http://community.intel.com/t5/Intel-oneAPI-Data-Parallel-C/Sync-with-buffers/m-p/1294672#M1345" target="_self"&gt;here&lt;/A&gt;&amp;nbsp;I decided to change my buffer model to USM. So, this kind of array declaration has been tested and had been working fine with the buffer model. Moreover, this code gives me a correct output on CPU while giving the same error.&amp;nbsp;&lt;BR /&gt;&lt;BR /&gt;I don't understand what I am doing wrong here and what the error says.&lt;/P&gt;
&lt;P&gt;Could you please help me with this?&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks,&lt;/P&gt;
&lt;P&gt;Leila&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&lt;a href="https://community.intel.com/t5/user/viewprofilepage/user-id/153633"&gt;@NoorjahanSk_Intel&lt;/a&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Sat, 03 Jul 2021 23:27:13 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1295723#M1374</guid>
      <dc:creator>leilag</dc:creator>
      <dc:date>2021-07-03T23:27:13Z</dc:date>
    </item>
    <item>
      <title>Re: 2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1295992#M1375</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;Thanks for reaching out to us.&lt;/P&gt;
&lt;P&gt;We are also able to reproduce the same issue on our end.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;We are looking into your issue internally. We will get back to you soon.&lt;/P&gt;
&lt;P&gt;Meanwhile, could you please provide the following environment details&lt;/P&gt;
&lt;P&gt;&amp;nbsp; Compiler version&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp; OS &amp;amp; it's version.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks &amp;amp; Regards&lt;/P&gt;
&lt;P&gt;Noorjahan.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 16 Jul 2021 06:21:28 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1295992#M1375</guid>
      <dc:creator>NoorjahanSk_Intel</dc:creator>
      <dc:date>2021-07-16T06:21:28Z</dc:date>
    </item>
    <item>
      <title>Re: 2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1296284#M1380</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you for looking into this.&lt;/P&gt;
&lt;P&gt;I am running the code on Inter DevCloud. I don't know where to look up the versions.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks,&lt;/P&gt;
&lt;P&gt;Leila&lt;/P&gt;</description>
      <pubDate>Tue, 06 Jul 2021 13:13:13 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1296284#M1380</guid>
      <dc:creator>leilag</dc:creator>
      <dc:date>2021-07-06T13:13:13Z</dc:date>
    </item>
    <item>
      <title>Re: 2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298020#M1407</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;The main cause of your error is the way you are allocating memory. Dynamic allocation uses Heap memory where as static allocation uses stack memory, You are trying to merge both methods.&lt;/P&gt;
&lt;P&gt;Instead of this&amp;nbsp;&amp;gt;&amp;gt;&lt;I&gt;int u_[3][DOMAIN_SIZE]; int *_u_[3] = {u_[0], u_[1], u_[2]}; u = _u_; &lt;/I&gt;you can use this line&lt;I&gt; &amp;gt;&amp;gt; u[0] = malloc_shared&amp;lt;int&amp;gt;(DOMAIN_SIZE, q);&amp;nbsp;&lt;/I&gt;&lt;/P&gt;
&lt;P&gt;We need to use e.wait(); after every parallel_for loop as this synchronizes the data before we proceed to any other operation on data.&lt;/P&gt;
&lt;P&gt;&amp;gt;&amp;gt;&lt;SPAN&gt;&amp;nbsp;&lt;EM&gt;I don't know where to look up the versions&lt;/EM&gt;.&lt;/SPAN&gt;&lt;/P&gt;
&lt;P&gt;You can check version by using compiler --version command ex: dpcpp --version&lt;/P&gt;
&lt;P&gt;If you have small input size, you can create 1D pointers and can traverse through row*array_width+column.&lt;/P&gt;
&lt;P&gt;You can find below complete snippet:&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;LI-CODE lang="markup"&gt;#include &amp;lt;CL/sycl.hpp&amp;gt;
#include &amp;lt;array&amp;gt;
#include &amp;lt;iostream&amp;gt;
#if FPGA || FPGA_EMULATOR
#include &amp;lt;CL/sycl/INTEL/fpga_extensions.hpp&amp;gt;
#endif

using namespace sycl;

#define M 4
#define N 5
#define M_LEN (M + 2)
#define N_LEN (N + 2)
constexpr size_t  DOMAIN_SIZE = M_LEN*N_LEN;
#define DIM 1

void VecAdd(queue &amp;amp;q,size_t size, const int a[DOMAIN_SIZE], const int b[DOMAIN_SIZE], int sum[DOMAIN_SIZE]) {
    range&amp;lt;1&amp;gt; num_items{size};
  auto e = q.parallel_for(num_items, [=](auto i) {
      sum[i] = a[i] + b[i];
  });
  e.wait();
}

int main() {
    auto R = range&amp;lt;1&amp;gt;{DOMAIN_SIZE};
   default_selector d_selector;
    queue q(d_selector);
    std::cout &amp;lt;&amp;lt; "Device: " &amp;lt;&amp;lt; q.get_device().get_info&amp;lt;info::device::name&amp;gt;() &amp;lt;&amp;lt; std::endl;

    int **u = malloc_shared&amp;lt;int *&amp;gt;(DOMAIN_SIZE, q);
    int **v = malloc_shared&amp;lt;int *&amp;gt;(DOMAIN_SIZE, q);
    int **p = malloc_shared&amp;lt;int *&amp;gt;(DOMAIN_SIZE, q);
    for(int i=0;i&amp;lt;3;i++) {
            u[i] = malloc_shared&amp;lt;int&amp;gt;(DOMAIN_SIZE, q);

            v[i] = malloc_shared&amp;lt;int&amp;gt;(DOMAIN_SIZE, q);
            p[i] = malloc_shared&amp;lt;int&amp;gt;(DOMAIN_SIZE, q);
    }
     auto e=q.parallel_for(R, [=](auto i) {
        u[0][i] = i;
        v[0][i] = 2*i;
    });
    e.wait();
    VecAdd(q, DOMAIN_SIZE, u[0], v[0], p[0]);

    for (int i=0; i&amp;lt;DOMAIN_SIZE; i++)
      std::cout &amp;lt;&amp;lt; "p[0][" &amp;lt;&amp;lt; i &amp;lt;&amp;lt; "] = " &amp;lt;&amp;lt; p[0][i] &amp;lt;&amp;lt; std::endl;
   free(u,q);
   free(v,q);
   free(p,q);
    return 0;
}&lt;/LI-CODE&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Let us know if it helps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks &amp;amp; Regards&lt;/P&gt;
&lt;P&gt;Noorjahan&lt;/P&gt;</description>
      <pubDate>Tue, 20 Jul 2021 04:41:56 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298020#M1407</guid>
      <dc:creator>NoorjahanSk_Intel</dc:creator>
      <dc:date>2021-07-20T04:41:56Z</dc:date>
    </item>
    <item>
      <title>Re: 2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298296#M1413</link>
      <description>&lt;P&gt;Hello Noorjahan,&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thank you for taking the time and debugging my code. It did resolve the issue.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;All the best,&lt;/P&gt;
&lt;P&gt;Leila&lt;/P&gt;</description>
      <pubDate>Tue, 13 Jul 2021 21:49:52 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298296#M1413</guid>
      <dc:creator>leilag</dc:creator>
      <dc:date>2021-07-13T21:49:52Z</dc:date>
    </item>
    <item>
      <title>Re: 2D array on GPU with USM</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298408#M1414</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;
&lt;P&gt;Thank you for accepting as a solution.&lt;/P&gt;
&lt;P&gt;As this issue has been resolved, we will no longer respond to this thread.&lt;/P&gt;
&lt;P&gt;If you require any additional assistance from Intel, please start a new thread.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Thanks &amp;amp; Regards&lt;/P&gt;
&lt;P&gt;Noorjahan.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 20 Jul 2021 04:39:35 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/2D-array-on-GPU-with-USM/m-p/1298408#M1414</guid>
      <dc:creator>NoorjahanSk_Intel</dc:creator>
      <dc:date>2021-07-20T04:39:35Z</dc:date>
    </item>
  </channel>
</rss>

