<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic Re:MPI Bus Error in Intel® MPI Library</title>
    <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1190517#M6898</link>
    <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Could you please let us know if your issue is resolved.&lt;/P&gt;&lt;P&gt;If not do let us know. So that we will be able to help you regarding the same.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Prasanth&lt;/P&gt;&lt;BR /&gt;</description>
    <pubDate>Wed, 08 Jul 2020 05:19:45 GMT</pubDate>
    <dc:creator>PrasanthD_intel</dc:creator>
    <dc:date>2020-07-08T05:19:45Z</dc:date>
    <item>
      <title>MPI Bus Error</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186593#M6875</link>
      <description>&lt;P&gt;I'm developing a MPI application, which relies heavily on the MPI shared memory. Recently, I keep hitting the following error messages:&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;srun: error: compute-42-013: task 32: Bus error&lt;BR /&gt;srun: Terminating job step 324080.0&lt;BR /&gt;slurmstepd: error: *** STEP 324080.0 ON compute-42-012 CANCELLED AT 2020-06-14T04:17:51 ***&lt;BR /&gt;forrtl: error (78): process killed (SIGTERM)&lt;BR /&gt;Image &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PC &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Routine &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Line &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Source&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000C8E308E &amp;nbsp;Unknown &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Unknown &amp;nbsp;Unknown&lt;BR /&gt;libpthread-2.17.s &amp;nbsp;00002B370FBBA5D0 &amp;nbsp;Unknown &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Unknown &amp;nbsp;Unknown&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000397721B &amp;nbsp;PMPIDI_CH3I_Progr &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1040 &amp;nbsp;ch3_progress.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000039FC370 &amp;nbsp;MPIC_Wait &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 269 &amp;nbsp;helper_fns.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000039FD83A &amp;nbsp;MPIC_Sendrecv &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 580 &amp;nbsp;helper_fns.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000392F61B &amp;nbsp;MPIR_Allgather_in &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 257 &amp;nbsp;allgather.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;0000000003931752 &amp;nbsp;MPIR_Allgather &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;858 &amp;nbsp;allgather.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;0000000003931A77 &amp;nbsp;MPIR_Allgather_im &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 905 &amp;nbsp;allgather.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;0000000003933226 &amp;nbsp;PMPI_Allgather &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 1068 &amp;nbsp;allgather.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000392CECE &amp;nbsp;Unknown &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Unknown &amp;nbsp;Unknown&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;srun: error: compute-41-006: task 16: Bus error&lt;BR /&gt;srun: Terminating job step 324024.0&lt;BR /&gt;slurmstepd: error: *** STEP 324024.0 ON compute-41-006 CANCELLED AT 2020-06-13T16:54:13 ***&lt;BR /&gt;forrtl: error (78): process killed (SIGTERM)&lt;BR /&gt;Image &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;PC &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Routine &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Line &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;Source&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000C85058E &amp;nbsp;Unknown &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Unknown &amp;nbsp;Unknown&lt;BR /&gt;libpthread-2.17.s &amp;nbsp;00002AEBC007A5D0 &amp;nbsp;Unknown &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; Unknown &amp;nbsp;Unknown&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000038E46DB &amp;nbsp;PMPIDI_CH3I_Progr &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp;1040 &amp;nbsp;ch3_progress.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;0000000003969830 &amp;nbsp;MPIC_Wait &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 269 &amp;nbsp;helper_fns.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;000000000396ACFA &amp;nbsp;MPIC_Sendrecv &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 580 &amp;nbsp;helper_fns.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000038BA379 &amp;nbsp;MPIR_Alltoall_int &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 438 &amp;nbsp;alltoall.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000038BBE3D &amp;nbsp;MPIR_Alltoall &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 734 &amp;nbsp;alltoall.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000038BC162 &amp;nbsp;MPIR_Alltoall_imp &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 775 &amp;nbsp;alltoall.c&lt;BR /&gt;pVelodyne_intel_4 &amp;nbsp;00000000038BD875 &amp;nbsp;PMPI_Alltoall &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; &amp;nbsp; 958 &amp;nbsp;alltoall.c&lt;BR /&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;It seems the bus error is inside the MPI subroutine. Since I do not have the source code&amp;nbsp;&amp;nbsp;of intel MPI, I have no idea what went wrong.&amp;nbsp;&lt;/P&gt;&lt;P&gt;The intel mpi version I'm using is&amp;nbsp;intel_parallel_studio/2018u4/compilers_and_libraries_2018.5.274.&amp;nbsp;&lt;/P&gt;&lt;P&gt;Any idea how to fix it?&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Thanks.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Thu, 18 Jun 2020 13:50:59 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186593#M6875</guid>
      <dc:creator>Pan__Hua</dc:creator>
      <dc:date>2020-06-18T13:50:59Z</dc:date>
    </item>
    <item>
      <title>Hi,</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186594#M6876</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Since you got a bus error can you check the memory allocations of your program&amp;nbsp;i.e whether the memory allocated&amp;nbsp;is more than the system's memory?&lt;/P&gt;&lt;P&gt;Also, there is a limit for the number of communicators in MPI which was around 32000. This&amp;nbsp;means the maximum number of windows you can create is 32000.&lt;/P&gt;&lt;P&gt;Looks like the program is terminated by the srun but from the given trace we are not sure why.&lt;/P&gt;&lt;P&gt;Can you provide a reproducer code so that we can debug from our side?&lt;/P&gt;&lt;P&gt;Can your check your code using ITAC[For analyzing] and Intel Inspector[For memory errors].&lt;/P&gt;&lt;P&gt;For more info on using these tools refer to this query&amp;nbsp;&lt;A href="https://software.intel.com/en-us/forums/intel-clusters-and-hpc-technology/topic/623887"&gt;https://software.intel.com/en-us/forums/intel-clusters-and-hpc-technology/topic/623887&lt;/A&gt;&lt;/P&gt;&lt;P&gt;Also, we suggest you upgrade to the latest version 2019u7 and check if the error persists.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Prasanth&lt;/P&gt;</description>
      <pubDate>Fri, 19 Jun 2020 07:37:06 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186594#M6876</guid>
      <dc:creator>PrasanthD_intel</dc:creator>
      <dc:date>2020-06-19T07:37:06Z</dc:date>
    </item>
    <item>
      <title>In my application, I</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186595#M6877</link>
      <description>&lt;P&gt;In my application, I allocated several(&amp;lt;10)&amp;nbsp;huge MPI shared memories&amp;nbsp;to hold the datasets. It is possible that the memory run out since I do not see this kind of bus error when the datasets were smaller.&lt;/P&gt;&lt;P&gt;Here is my next question, why the code did not quit during the memory allocation, such as oom-kill? In the code, I actually check every memory allocation to make sure they were successfully allocated and initialize them to zero. If memory was out, I assume the Linux system would kill the process, right?&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;&amp;nbsp;&amp;nbsp;&lt;/P&gt;</description>
      <pubDate>Fri, 19 Jun 2020 19:10:27 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1186595#M6877</guid>
      <dc:creator>Pan__Hua</dc:creator>
      <dc:date>2020-06-19T19:10:27Z</dc:date>
    </item>
    <item>
      <title>Re:MPI Bus Error</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1187944#M6882</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;Could you please share the command line you are using? We would like to know whether you are using MPIRun or srun or mpiexec to launch.&lt;/P&gt;&lt;P&gt;Please share the details of the interconnect fabric you were using and the size of the MPI window in your program.&lt;/P&gt;&lt;P&gt;Also, we recommend you to upgrade to the latest version of Intel MPI, as the RMA window allocations have been optimized in the latest versions&lt;/P&gt;&lt;P&gt;Could you please also share the details of the application that you are using?&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Also, we would like to know at what point you are getting this error, is it immediately after launching the program or is it after a delay of say 1 hour or so.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Also, we request you to please provide details of the NIC card that you are using&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;These details would be of help in debugging the issues that you are facing.&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Thanks&lt;/P&gt;&lt;P&gt;Goutham&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Mon, 29 Jun 2020 06:30:41 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1187944#M6882</guid>
      <dc:creator>GouthamK_Intel</dc:creator>
      <dc:date>2020-06-29T06:30:41Z</dc:date>
    </item>
    <item>
      <title>Re:MPI Bus Error</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1190517#M6898</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Could you please let us know if your issue is resolved.&lt;/P&gt;&lt;P&gt;If not do let us know. So that we will be able to help you regarding the same.&lt;/P&gt;&lt;P&gt;&amp;nbsp;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Prasanth&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Wed, 08 Jul 2020 05:19:45 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1190517#M6898</guid>
      <dc:creator>PrasanthD_intel</dc:creator>
      <dc:date>2020-07-08T05:19:45Z</dc:date>
    </item>
    <item>
      <title>Re:MPI Bus Error</title>
      <link>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1191838#M6918</link>
      <description>&lt;P&gt;Hi,&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;We are assuming this issue has been resolved and will no longer respond to this thread.&amp;nbsp;If you require additional assistance from Intel, please start a new thread.&amp;nbsp;Any further interaction in this thread will be considered community only&lt;/P&gt;&lt;P&gt;&lt;BR /&gt;&lt;/P&gt;&lt;P&gt;Regards&lt;/P&gt;&lt;P&gt;Prasanth&lt;/P&gt;&lt;BR /&gt;</description>
      <pubDate>Mon, 13 Jul 2020 05:27:45 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-MPI-Library/MPI-Bus-Error/m-p/1191838#M6918</guid>
      <dc:creator>PrasanthD_intel</dc:creator>
      <dc:date>2020-07-13T05:27:45Z</dc:date>
    </item>
  </channel>
</rss>

