<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic copying data from host to native MIC array in Software Archive</title>
    <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960234#M21653</link>
    <description>&lt;P&gt;Hello,&lt;/P&gt;

&lt;P&gt;I am trying to write a very simple program, in which I natively allocate some memory on coprocessor and try to copy data from host onto this natively allocated memory but I keep getting errors. Could anyone kindly advise what is going wrong in my code.&lt;/P&gt;

&lt;P&gt;_attribute__ ((target(mic)))&lt;BR /&gt;
	unsigned long long numElems;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	void&amp;nbsp;&lt;BR /&gt;
	PerformNativeAllocation(short* ptr, short* temp)&lt;BR /&gt;
	{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;cout &amp;lt;&amp;lt; " Perform Native allocation " &amp;lt;&amp;lt; endl;&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;#pragma offload target(mic:0) \&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;nocopy(temp)&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;temp = (short*) malloc(numElems*sizeof(short));&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;//free(temp);&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;#pragma offload target(mic:0) &amp;nbsp;\&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;in(ptr[0:numElems] :into(temp) alloc_if(0) free_if(0))&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;for (unsigned long long ii=0; ii &amp;lt; numElems; ++ii)&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;temp[ii]*=2;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;free(temp);&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	}&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;Thank you&lt;/P&gt;

&lt;P&gt;AM&lt;/P&gt;</description>
    <pubDate>Wed, 15 Jan 2014 22:29:22 GMT</pubDate>
    <dc:creator>ankit_m_</dc:creator>
    <dc:date>2014-01-15T22:29:22Z</dc:date>
    <item>
      <title>copying data from host to native MIC array</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960234#M21653</link>
      <description>&lt;P&gt;Hello,&lt;/P&gt;

&lt;P&gt;I am trying to write a very simple program, in which I natively allocate some memory on coprocessor and try to copy data from host onto this natively allocated memory but I keep getting errors. Could anyone kindly advise what is going wrong in my code.&lt;/P&gt;

&lt;P&gt;_attribute__ ((target(mic)))&lt;BR /&gt;
	unsigned long long numElems;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	void&amp;nbsp;&lt;BR /&gt;
	PerformNativeAllocation(short* ptr, short* temp)&lt;BR /&gt;
	{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;cout &amp;lt;&amp;lt; " Perform Native allocation " &amp;lt;&amp;lt; endl;&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;#pragma offload target(mic:0) \&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;nocopy(temp)&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;temp = (short*) malloc(numElems*sizeof(short));&amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;//free(temp);&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;#pragma offload target(mic:0) &amp;nbsp;\&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;in(ptr[0:numElems] :into(temp) alloc_if(0) free_if(0))&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;for (unsigned long long ii=0; ii &amp;lt; numElems; ++ii)&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;{&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;temp[ii]*=2;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;free(temp);&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp; &amp;nbsp;&amp;nbsp;&amp;nbsp; &amp;nbsp;}&lt;BR /&gt;
	}&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;

&lt;P&gt;Thank you&lt;/P&gt;

&lt;P&gt;AM&lt;/P&gt;</description>
      <pubDate>Wed, 15 Jan 2014 22:29:22 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960234#M21653</guid>
      <dc:creator>ankit_m_</dc:creator>
      <dc:date>2014-01-15T22:29:22Z</dc:date>
    </item>
    <item>
      <title>Development's guidance is:</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960235#M21654</link>
      <description>&lt;P&gt;Development's guidance is: "Memory allocated by the user using malloc or some such API cannot participate in the data transfer pragmas. For the pragmas to be usable, the allocation must be done using the pragmas also."&lt;/P&gt;

&lt;P&gt;There is an exception to that and if compelled one can call malloc/memcpy in offloaded code; however, there is inefficiency with the extra allocation for the IN() variable in addition to the user target-side malloc. There is an example demonstrating this under &lt;STRONG&gt;Example of Local Pointer &lt;/STRONG&gt;on the &lt;A href="http://software.intel.com/en-us/articles/effective-use-of-the-intel-compilers-offload-features" target="_blank"&gt;Effective Use of the Intel Compiler's Offload Features &lt;/A&gt;page. Instead of using INTO, one uses malloc and memcpy in the offloaded code.&lt;/P&gt;

&lt;P&gt;The alternative is to use the pragma allocation and INTO as shown below.&lt;/P&gt;

&lt;P&gt;void PerformNativeAllocation(short* ptr, short* temp)&lt;BR /&gt;
	{&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; cout &amp;lt;&amp;lt; " Perform Native allocation " &amp;lt;&amp;lt; endl;&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // allocate temp on target only&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; #pragma offload_transfer target(mic:0) nocopy(temp : length(numElems) alloc_if(1) free_if(0))&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // transfer ptr values into temp&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; #pragma offload target(mic:0)&amp;nbsp; \&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; in(ptr[0:numElems] :into(temp) alloc_if(0) free_if(0))&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; for (unsigned long long ii=0; ii &amp;lt; numElems; ++ii)&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; {&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; temp[ii]*=2;&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; }&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; // transfer values out and free target memory&lt;BR /&gt;
	&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp;&amp;nbsp; #pragma offload_transfer target(mic:0) out(temp[0:numElems] : into(ptr)&amp;nbsp; alloc_if(0) free_if(1))&lt;BR /&gt;
	}&lt;/P&gt;

&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jan 2014 18:22:00 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960235#M21654</guid>
      <dc:creator>Kevin_D_Intel</dc:creator>
      <dc:date>2014-01-16T18:22:00Z</dc:date>
    </item>
    <item>
      <title>Hello Kevin, </title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960236#M21655</link>
      <description>&lt;P&gt;Hello Kevin,&amp;nbsp;&lt;/P&gt;

&lt;P&gt;Thank you very much for your reply and help. I have been allocating memory and transferring data over to MIC using the same approach as suggested by you; however, I was trying to see if that initial memory allocation time using "nocopy" clause can be reduced and it appears that it cannot. Thank you for the heads up though, this really saves a lot of my time.&amp;nbsp;&lt;/P&gt;

&lt;P&gt;Sincerely,&amp;nbsp;&lt;/P&gt;

&lt;P&gt;AM&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jan 2014 18:44:42 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960236#M21655</guid>
      <dc:creator>ankit_m_</dc:creator>
      <dc:date>2014-01-16T18:44:42Z</dc:date>
    </item>
    <item>
      <title>Ok. Maybe you have also</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960237#M21656</link>
      <description>&lt;P&gt;Ok. Maybe you have also already tried "hiding" the initial allocation some by making it asynchronous using the &lt;STRONG&gt;signal() &lt;/STRONG&gt;clause and then either a subsequent &lt;STRONG&gt;offload_wait &lt;/STRONG&gt;pragma, &lt;STRONG&gt;wait()&lt;/STRONG&gt; clause for the INTO transfer, of the &lt;STRONG&gt;_Offload_signaled() &lt;/STRONG&gt;API?&lt;/P&gt;</description>
      <pubDate>Thu, 16 Jan 2014 19:04:13 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960237#M21656</guid>
      <dc:creator>Kevin_D_Intel</dc:creator>
      <dc:date>2014-01-16T19:04:13Z</dc:date>
    </item>
    <item>
      <title>Yes, I tried that too in one</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960238#M21657</link>
      <description>&lt;P&gt;Yes, I tried that too in one of my double buffering toy programs but I still see around 15 - 20 sec worth of initial (one time) allocation (offload) delay. Once the memory is allocated, the transfer is pretty fast. Thank you for the heads up though Kevin. I really appreciate your input and help.&amp;nbsp;&lt;/P&gt;

&lt;P&gt;AVM&lt;/P&gt;</description>
      <pubDate>Fri, 17 Jan 2014 00:03:34 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960238#M21657</guid>
      <dc:creator>ankit_m_</dc:creator>
      <dc:date>2014-01-17T00:03:34Z</dc:date>
    </item>
    <item>
      <title>Yes, the initial allocation</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960239#M21658</link>
      <description>&lt;P&gt;Yes, the initial allocation slowness is a known matter. It is&amp;nbsp;within the&amp;nbsp;card's OS and hopefully it can continue decreasing over time.&lt;/P&gt;</description>
      <pubDate>Tue, 21 Jan 2014 11:31:27 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960239#M21658</guid>
      <dc:creator>Kevin_D_Intel</dc:creator>
      <dc:date>2014-01-21T11:31:27Z</dc:date>
    </item>
    <item>
      <title>Thank your for your help and</title>
      <link>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960240#M21659</link>
      <description>&lt;P&gt;Thank your for your help and reply Kevin.&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 23 Jan 2014 00:28:51 GMT</pubDate>
      <guid>https://community.intel.com/t5/Software-Archive/copying-data-from-host-to-native-MIC-array/m-p/960240#M21659</guid>
      <dc:creator>ankit_m_</dc:creator>
      <dc:date>2014-01-23T00:28:51Z</dc:date>
    </item>
  </channel>
</rss>

