<?xml version="1.0" encoding="UTF-8"?>
<rss xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" version="2.0">
  <channel>
    <title>topic  interdependent modules and IPO in Intel® Fortran Compiler</title>
    <link>https://community.intel.com/t5/Intel-Fortran-Compiler/interdependent-modules-and-IPO/m-p/1123280#M132196</link>
    <description>&lt;P&gt;Hi all,&lt;/P&gt;

&lt;P&gt;here a simple example of interdependent derived types:&lt;/P&gt;

&lt;PRE class="brush:fortran;"&gt;MODULE m_common

   USE, intrinsic :: iso_fortran_env

   implicit none

   integer, parameter :: ip = INT32
   integer, parameter :: rp = REAL64

   integer(ip), parameter :: n = 80400 , maxit = 1000

   integer(8) :: count , count_scale , count_max

   integer(ip) :: i , j , k

   real(rp) :: time , rate

   TYPE vec2d

      real(rp) :: x = 0._rp
      real(rp) :: y = 0._rp

   CONTAINS

      procedure, pass(self) :: equal_vec2d

      generic :: assignment(=) =&amp;gt; equal_vec2d

   END TYPE vec2d

CONTAINS

   PURE ELEMENTAL SUBROUTINE equal_vec2d( self , from )

      class(vec2d), intent(inout) :: self
      class(vec2d), intent(in   ) :: from

      self%x = from%x
      self%y = from%y

   END SUBROUTINE equal_vec2d

END MODULE m_common

MODULE m_data

   USE m_common

   implicit none

   TYPE subdata

      type(vec2d) :: vector

   END TYPE subdata

   TYPE somedata

      type(subdata), allocatable :: sub_array(:)

   END TYPE somedata

END MODULE m_data

PROGRAM test

   USE m_common
   USE m_data

   type(somedata) :: totest

   type(vec2d) :: vector

   call system_clock( count , count_scale , count_max )

   rate = real( count_scale , 8 )

   allocate( totest%sub_array( n ) )

   time = real( count , 8 )

   do i = 1,maxit

      do k = 1,n

         vector = totest%sub_array( k )%vector

      end do

   end do

   call system_clock( count , count_scale , count_max )

   write(6,'(ES10.2)') ( real( count , 8 ) - time ) / rate

END PROGRAM test&lt;/PRE&gt;

&lt;P&gt;If I compile this program with ifort and gfortran, I obtain these time computation results:&lt;/P&gt;

&lt;PRE class="brush:bash;"&gt;ifort -O3 test_all.f90 -o exe ; ./exe
6.95E-04

gfortran -O3 test_all.f90 -o exe ; ./exe
6.76E-04&lt;/PRE&gt;

&lt;P&gt;At this time, no problem. Now, if I split in 3 programs test1.f90 test2.f90 and test3.f90, I obtain these results:&lt;/P&gt;

&lt;PRE class="brush:bash;"&gt;ifort -O3 test1.f90 test2.f90 test3.f90 -o exe ; ./exe
1.04E+00

ifort -O3 -ipo test1.f90 test2.f90 test3.f90 -o exe ; ./exe
7.45E-04

gfortran -O3 test1.f90 test2.f90 test3.f90 -o exe ; ./exe
3.17E-01

gfortran -O3 -flto test1.f90 test2.f90 test3.f90 -o exe ; ./exe
1.01E-01&lt;/PRE&gt;

&lt;P&gt;So, time computation differences are very very big, and I do not understand what can really explain such differences, just accessing in memory to the main derived type array values with a very short stride&lt;/P&gt;

&lt;P&gt;If interprocedural optimization with ifort gives me back the same performance than with only one file program, it is not the case with gfortran even if an effect is found&lt;/P&gt;

&lt;P&gt;There is a way to compile separately my modules in order to use them efficiently in a very large program calling them a lot without ipo flag ??? maybe compiling a static or shared library with my modules and then link it to my main program ???&lt;/P&gt;

&lt;P&gt;Fred&lt;/P&gt;</description>
    <pubDate>Tue, 12 Jul 2016 16:26:11 GMT</pubDate>
    <dc:creator>Frederic_C_</dc:creator>
    <dc:date>2016-07-12T16:26:11Z</dc:date>
    <item>
      <title>interdependent modules and IPO</title>
      <link>https://community.intel.com/t5/Intel-Fortran-Compiler/interdependent-modules-and-IPO/m-p/1123280#M132196</link>
      <description>&lt;P&gt;Hi all,&lt;/P&gt;

&lt;P&gt;here a simple example of interdependent derived types:&lt;/P&gt;

&lt;PRE class="brush:fortran;"&gt;MODULE m_common

   USE, intrinsic :: iso_fortran_env

   implicit none

   integer, parameter :: ip = INT32
   integer, parameter :: rp = REAL64

   integer(ip), parameter :: n = 80400 , maxit = 1000

   integer(8) :: count , count_scale , count_max

   integer(ip) :: i , j , k

   real(rp) :: time , rate

   TYPE vec2d

      real(rp) :: x = 0._rp
      real(rp) :: y = 0._rp

   CONTAINS

      procedure, pass(self) :: equal_vec2d

      generic :: assignment(=) =&amp;gt; equal_vec2d

   END TYPE vec2d

CONTAINS

   PURE ELEMENTAL SUBROUTINE equal_vec2d( self , from )

      class(vec2d), intent(inout) :: self
      class(vec2d), intent(in   ) :: from

      self%x = from%x
      self%y = from%y

   END SUBROUTINE equal_vec2d

END MODULE m_common

MODULE m_data

   USE m_common

   implicit none

   TYPE subdata

      type(vec2d) :: vector

   END TYPE subdata

   TYPE somedata

      type(subdata), allocatable :: sub_array(:)

   END TYPE somedata

END MODULE m_data

PROGRAM test

   USE m_common
   USE m_data

   type(somedata) :: totest

   type(vec2d) :: vector

   call system_clock( count , count_scale , count_max )

   rate = real( count_scale , 8 )

   allocate( totest%sub_array( n ) )

   time = real( count , 8 )

   do i = 1,maxit

      do k = 1,n

         vector = totest%sub_array( k )%vector

      end do

   end do

   call system_clock( count , count_scale , count_max )

   write(6,'(ES10.2)') ( real( count , 8 ) - time ) / rate

END PROGRAM test&lt;/PRE&gt;

&lt;P&gt;If I compile this program with ifort and gfortran, I obtain these time computation results:&lt;/P&gt;

&lt;PRE class="brush:bash;"&gt;ifort -O3 test_all.f90 -o exe ; ./exe
6.95E-04

gfortran -O3 test_all.f90 -o exe ; ./exe
6.76E-04&lt;/PRE&gt;

&lt;P&gt;At this time, no problem. Now, if I split in 3 programs test1.f90 test2.f90 and test3.f90, I obtain these results:&lt;/P&gt;

&lt;PRE class="brush:bash;"&gt;ifort -O3 test1.f90 test2.f90 test3.f90 -o exe ; ./exe
1.04E+00

ifort -O3 -ipo test1.f90 test2.f90 test3.f90 -o exe ; ./exe
7.45E-04

gfortran -O3 test1.f90 test2.f90 test3.f90 -o exe ; ./exe
3.17E-01

gfortran -O3 -flto test1.f90 test2.f90 test3.f90 -o exe ; ./exe
1.01E-01&lt;/PRE&gt;

&lt;P&gt;So, time computation differences are very very big, and I do not understand what can really explain such differences, just accessing in memory to the main derived type array values with a very short stride&lt;/P&gt;

&lt;P&gt;If interprocedural optimization with ifort gives me back the same performance than with only one file program, it is not the case with gfortran even if an effect is found&lt;/P&gt;

&lt;P&gt;There is a way to compile separately my modules in order to use them efficiently in a very large program calling them a lot without ipo flag ??? maybe compiling a static or shared library with my modules and then link it to my main program ???&lt;/P&gt;

&lt;P&gt;Fred&lt;/P&gt;</description>
      <pubDate>Tue, 12 Jul 2016 16:26:11 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Fortran-Compiler/interdependent-modules-and-IPO/m-p/1123280#M132196</guid>
      <dc:creator>Frederic_C_</dc:creator>
      <dc:date>2016-07-12T16:26:11Z</dc:date>
    </item>
    <item>
      <title>Cross posted. Refer to https:</title>
      <link>https://community.intel.com/t5/Intel-Fortran-Compiler/interdependent-modules-and-IPO/m-p/1123281#M132197</link>
      <description>&lt;P&gt;Cross posted. Refer to &lt;A href="https://software.intel.com/en-us/node/661253"&gt;https://software.intel.com/en-us/node/661253&lt;/A&gt;&amp;nbsp;for further comments.&lt;/P&gt;</description>
      <pubDate>Tue, 12 Jul 2016 20:56:28 GMT</pubDate>
      <guid>https://community.intel.com/t5/Intel-Fortran-Compiler/interdependent-modules-and-IPO/m-p/1123281#M132197</guid>
      <dc:creator>Kevin_D_Intel</dc:creator>
      <dc:date>2016-07-12T20:56:28Z</dc:date>
    </item>
  </channel>
</rss>

