<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Hi, in Intel® oneAPI DPC++/C++ Compiler</title>
    <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184548#M432</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I understand what you want to do. You want to compile both of your file ie function file and main file separately and wants to link the function defined into functions file into the kernel of the main file. It is very much doable, please follow the below steps to do this:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Create&amp;nbsp;a header file declaring those functions. You can also see the sample below and add the &lt;STRONG&gt;SYCL_EXTERNAL&lt;/STRONG&gt; attribute to it.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;(kernel.h)&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt;#pragma once
#include&amp;lt;CL/sycl.hpp&amp;gt;

extern SYCL_EXTERNAL void vectorAdd(const float *A, const float *B, float *C, int numElement, cl::sycl::item&amp;lt;1&amp;gt; item_ct1);

&lt;/PRE&gt;

&lt;UL&gt;&lt;LI&gt;Include this header file(&lt;STRONG&gt;kernel.h&lt;/STRONG&gt;) into your main file like in our case &lt;STRONG&gt;main.cpp&lt;/STRONG&gt; and also in the functions file ie &lt;STRONG&gt;kernel.cpp&lt;/STRONG&gt; file. Compile both file main.cpp and kernel.cpp separately and link them to generate executable, this will work.&lt;/LI&gt;&lt;/UL&gt;
&lt;P&gt;Do let us know if you face any problem while following the above steps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Warm Regards,&lt;/P&gt;
&lt;P&gt;Abhishek&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
    <pubDate>Tue, 19 May 2020 07:04:00 GMT</pubDate>
    <dc:creator>AbhishekD_Intel</dc:creator>
    <dc:date>2020-05-19T07:04:00Z</dc:date>
    <item>
      <title>Calling a function in kernel scope?</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184545#M429</link>
      <description>&lt;P&gt;Hi All,&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;For GPU targets, can I call a function in kernel scope? I have functions codes that are in different files. I want to call them inside parallel_for ?&amp;nbsp;&lt;/P&gt;&lt;P&gt;Could you please show me an example?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks in advance&lt;/P&gt;</description>
      <pubDate>Fri, 15 May 2020 11:52:25 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184545#M429</guid>
      <dc:creator>grypp</dc:creator>
      <dc:date>2020-05-15T11:52:25Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184546#M430</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Yes, you can call a function inside parallel_for() from a different file.&lt;/P&gt;&lt;P&gt;There is a couple of ways through you can achieve this I&amp;nbsp;have given examples of them below you can check it out.&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;You can define the function definition in one file(kernel.cpp) and include that&amp;nbsp;file in your main file(main.cpp) to access those functions into the main file and shown below:&amp;nbsp;&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;(main.cpp)&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt;#include &amp;lt;CL/sycl.hpp&amp;gt;
#include &amp;lt;iostream&amp;gt;
#include "kernel.cpp"
#define numElements 10

using namespace std;
int main(void){

        size_t size = numElements * sizeof(float);
        // Allocate the host vectors
        float *A = (float *)malloc(size);
        float *B = (float *)malloc(size);
        float *C = (float *)malloc(size);

        for (int i = 0; i &amp;lt; numElements; ++i)
        {
                A&lt;I&gt; = i;//rand()/(float)RAND_MAX;
                B&lt;I&gt; = i;//rand()/(float)RAND_MAX;
        }

        float *d_A, *d_B, *d_C;

        cl::sycl::queue queue( cl::sycl::gpu_selector{});
std::cout &amp;lt;&amp;lt; "Running on " &amp;lt;&amp;lt; queue.get_device().get_info&amp;lt;cl::sycl::info::device::name&amp;gt;() &amp;lt;&amp;lt; "\n";

        cl::sycl::device dev = queue.get_device();;
        cl::sycl::context ctx = queue.get_context();

        *((void **)&amp;amp;d_A) = cl::sycl::malloc_device(size, dev, ctx);
        *((void **)&amp;amp;d_B) = cl::sycl::malloc_device(size, dev, ctx);
        *((void **)&amp;amp;d_C) = cl::sycl::malloc_device(size, dev, ctx);

        memcpy((void*)(d_A), (void*)(A), size);
        memcpy((void*)(d_B), (void*)(B), size);


        {
          queue.submit(
            [&amp;amp;](cl::sycl::handler &amp;amp;cgh) {
              cgh.parallel_for&amp;lt;class vectorAdd_e83213&amp;gt;(
                cl::sycl::range&amp;lt;1&amp;gt;{numElements}, [=](cl::sycl::item&amp;lt;1&amp;gt; item_ct1) {
                  vectorAdd(d_A, d_B, d_C, numElements, item_ct1); //defined in other file
                });
            });
        }

        queue.wait();
        memcpy((void*)(C), (void*)(d_C), size);
        for(int i=0;i&amp;lt;numElements;i++)
                cout&amp;lt;&amp;lt;A&lt;I&gt;&amp;lt;&amp;lt;" "&amp;lt;&amp;lt;B&lt;I&gt;&amp;lt;&amp;lt;" "&amp;lt;&amp;lt;C&lt;I&gt;&amp;lt;&amp;lt;" "&amp;lt;&amp;lt;endl;

        cout&amp;lt;&amp;lt;endl;


        free(A);
        free(B);
        free(C);
        return 0;

}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;P&gt;(kernel.cpp)&lt;/P&gt;

&lt;PRE class="brush:cpp; class-name:dark;"&gt;#include &amp;lt;CL/sycl.hpp&amp;gt;
// Device kernel
 void vectorAdd(const float *A, const float *B, float *C, int numElement, cl::sycl::item&amp;lt;1&amp;gt; item_ct1)
{
        int i = item_ct1.get_linear_id();
        if (i &amp;lt; numElement)
        {
                C&lt;I&gt; = A&lt;I&gt; + B&lt;I&gt;;
        }
}

&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/PRE&gt;

&lt;UL&gt;&lt;LI&gt;You can define&amp;nbsp;__kernel in another file (kernel.cl) and can read the whole file into a buffer. Then you can call get_kernel("function_name") of Kernel class with the context of your device queue. This is like calling cl kernel inside the DPCPP program.&lt;/LI&gt;&lt;/UL&gt;
&lt;P&gt;(main.cpp)&lt;/P&gt;

&lt;PRE class="brush:cpp; class-name:dark;"&gt;#include&amp;lt;CL/sycl.hpp&amp;gt;
#include&amp;lt;iostream&amp;gt;
#include&amp;lt;fstream&amp;gt;
#include&amp;lt;string&amp;gt;
#define N 10
using namespace cl::sycl;
char* readCLFile(std::string f, unsigned int* size) {
    std::ifstream ifs(f);
    std::filebuf* fbuf = ifs.rdbuf();
    *size = fbuf-&amp;gt;pubseekoff(0, ifs.end, ifs.in);
    fbuf-&amp;gt;pubseekpos(0, ifs.in);
    char* buf = new char[*size];
    memset(buf, 0, sizeof(char) * (*size));
    fbuf-&amp;gt;sgetn(buf, (*size));
    ifs.close();
    return buf;
}

int main() {
    int a&lt;N&gt;,b&lt;N&gt;,c&lt;N&gt;;
    unsigned int size;
    //auto R = range&amp;lt;1&amp;gt;(N);
    for (int i = 0; i &amp;lt; N; i++) {
        a&lt;I&gt; = i;
        b&lt;I&gt; = i;
        c&lt;I&gt; = 0;
    }
        
    char* buf = readCLFile("kernel.cl", &amp;amp;size);
    queue q(gpu_selector{});
    std::cout &amp;lt;&amp;lt; "Running on " &amp;lt;&amp;lt; q.get_device().get_info&amp;lt;cl::sycl::info::device::name&amp;gt;() &amp;lt;&amp;lt; "\n";

    auto ctx = q.get_context();
    program p(ctx);
    p.build_with_source(std::string(buf, size));

    kernel k = p.get_kernel("vec_add");

    buffer&amp;lt;int, 1&amp;gt; bufa(a, range&amp;lt;1&amp;gt;(N) );
    buffer&amp;lt;int, 1&amp;gt; bufb(b, range&amp;lt;1&amp;gt;(N) );
    buffer&amp;lt;int, 1&amp;gt; bufc(c, range&amp;lt;1&amp;gt;(N) );

    q.submit([&amp;amp;](handler&amp;amp; h) {
        auto acc_a = bufa.get_access&amp;lt;access::mode::read&amp;gt;(h);
        auto acc_b = bufb.get_access&amp;lt;access::mode::read&amp;gt;(h);
        auto acc_c = bufc.get_access&amp;lt;access::mode::read_write&amp;gt;(h);

        h.set_args(acc_a, acc_b , acc_c);

        h.parallel_for(range&amp;lt;1&amp;gt;(N), k);
        });
    auto host_acc = bufc.get_access&amp;lt;access::mode::read&amp;gt;();
    for (int i = 0; i &amp;lt; N; i++)
        std::cout &amp;lt;&amp;lt; host_acc&lt;I&gt; &amp;lt;&amp;lt; "\n";

    return 0;
}
&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/I&gt;&lt;/N&gt;&lt;/N&gt;&lt;/N&gt;&lt;/PRE&gt;

&lt;P&gt;(kernel.cl)&lt;/P&gt;

&lt;PRE class="brush:cpp; class-name:dark;"&gt;__kernel void vec_add(__global int* ptr1, __global int* ptr2, __global int* ptr3)
{
    int index = get_global_id(0);

    ptr3[index] = ptr1[index] + ptr2[index];
}

&lt;/PRE&gt;

&lt;P&gt;Hope this would have solved your problem.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Warm Regards,&lt;/P&gt;
&lt;P&gt;Abhishek&lt;/P&gt;</description>
      <pubDate>Mon, 18 May 2020 13:28:36 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184546#M430</guid>
      <dc:creator>AbhishekD_Intel</dc:creator>
      <dc:date>2020-05-18T13:28:36Z</dc:date>
    </item>
    <item>
      <title>Hi Abhishek,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184547#M431</link>
      <description>&lt;P&gt;Hi&amp;nbsp;Abhishek,&lt;/P&gt;&lt;P&gt;Thanks. This is NOT what I am asking. Let me clarify my question. I wanted a call a CPU function which is in another file. I don't want to inline it. Also, I don't want to write a low-level OpenCL kernel for it.&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;I mean, if I remove "#include "kernel.cpp" in first code&amp;nbsp;and compile two files separately, would it work?&lt;/P&gt;</description>
      <pubDate>Mon, 18 May 2020 14:20:43 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184547#M431</guid>
      <dc:creator>grypp</dc:creator>
      <dc:date>2020-05-18T14:20:43Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184548#M432</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;I understand what you want to do. You want to compile both of your file ie function file and main file separately and wants to link the function defined into functions file into the kernel of the main file. It is very much doable, please follow the below steps to do this:&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Create&amp;nbsp;a header file declaring those functions. You can also see the sample below and add the &lt;STRONG&gt;SYCL_EXTERNAL&lt;/STRONG&gt; attribute to it.&lt;/LI&gt;&lt;/UL&gt;&lt;P&gt;(kernel.h)&lt;/P&gt;
&lt;PRE class="brush:cpp; class-name:dark;"&gt;#pragma once
#include&amp;lt;CL/sycl.hpp&amp;gt;

extern SYCL_EXTERNAL void vectorAdd(const float *A, const float *B, float *C, int numElement, cl::sycl::item&amp;lt;1&amp;gt; item_ct1);

&lt;/PRE&gt;

&lt;UL&gt;&lt;LI&gt;Include this header file(&lt;STRONG&gt;kernel.h&lt;/STRONG&gt;) into your main file like in our case &lt;STRONG&gt;main.cpp&lt;/STRONG&gt; and also in the functions file ie &lt;STRONG&gt;kernel.cpp&lt;/STRONG&gt; file. Compile both file main.cpp and kernel.cpp separately and link them to generate executable, this will work.&lt;/LI&gt;&lt;/UL&gt;
&lt;P&gt;Do let us know if you face any problem while following the above steps.&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;
&lt;P&gt;Warm Regards,&lt;/P&gt;
&lt;P&gt;Abhishek&lt;/P&gt;
&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Tue, 19 May 2020 07:04:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184548#M432</guid>
      <dc:creator>AbhishekD_Intel</dc:creator>
      <dc:date>2020-05-19T07:04:00Z</dc:date>
    </item>
    <item>
      <title>That's exactly what I was</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184549#M433</link>
      <description>&lt;P&gt;That's exactly what I was asking. Awesome thanks!&amp;nbsp;&lt;/P&gt;&lt;P&gt;Follow-up questions&amp;nbsp;&lt;/P&gt;&lt;UL&gt;&lt;LI&gt;Is it possible to enable nested parallelism "parallel_for"&amp;nbsp;in "vectorAdd" function?&lt;/LI&gt;&lt;LI&gt;Do I need pragma once?&amp;nbsp;&lt;/LI&gt;&lt;/UL&gt;</description>
      <pubDate>Tue, 19 May 2020 08:09:46 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184549#M433</guid>
      <dc:creator>grypp</dc:creator>
      <dc:date>2020-05-19T08:09:46Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184550#M434</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Thanks for the confirmation. Good to know that our&amp;nbsp;provided solution helps you.&lt;/P&gt;&lt;P&gt;Regarding your followup questions:&lt;/P&gt;&lt;OL&gt;&lt;LI&gt;&lt;STRONG&gt;pragma once&amp;nbsp;&lt;/STRONG&gt;its a&amp;nbsp;standard we use while working with header files. Because for a large application there might be the case compiler will get more than one initialization due to including those headers. So to avoid multiple initializations its good practice to use pragma once and it's up to you, whether you want to include it or not.&lt;/LI&gt;&lt;LI&gt;For details regarding &lt;STRONG&gt;Nested parallelism,&lt;/STRONG&gt;&amp;nbsp;I will suggest you post a new thread because this topic is very much different from the current topic. So&amp;nbsp;to get detailed idea about Nested parallelism please go forward and post a new thread. We will definitely help you there.&amp;nbsp;&lt;/LI&gt;&lt;/OL&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;Warm Regards,&lt;/P&gt;&lt;P&gt;Abhishek&lt;/P&gt;</description>
      <pubDate>Wed, 20 May 2020 06:51:40 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184550#M434</guid>
      <dc:creator>AbhishekD_Intel</dc:creator>
      <dc:date>2020-05-20T06:51:40Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184551#M435</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Glad to hear that solution provided helped.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Could you please raise a new thread for &lt;STRONG&gt;Nested Parallelism &lt;/STRONG&gt;issue, giving details about your use case, so that we can make a feature request to the concerned team.&amp;nbsp;&lt;/P&gt;&lt;P&gt;We are closing this thread.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Goutham&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Mon, 01 Jun 2020 09:15:15 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-oneAPI-DPC-C-Compiler/Calling-a-function-in-kernel-scope/m-p/1184551#M435</guid>
      <dc:creator>GouthamK_Intel</dc:creator>
      <dc:date>2020-06-01T09:15:15Z</dc:date>
    </item>
  </channel>
</rss>

