vectorization with generic operators / and bug ?

Frederic_C_ · ‎05-03-2016

Hi

find this little test program with two issues:

MODULE m_common

   USE, intrinsic :: iso_fortran_env

   implicit none

   integer, parameter :: ip = INT32
   integer, parameter :: rp = REAL64

END MODULE m_common

MODULE m_vec_tens

   USE m_common

   implicit none

   TYPE vec2d

      real(rp) :: x = 0._rp
      real(rp) :: y = 0._rp

   CONTAINS

      procedure, pass(self) :: equal_vec2d
      procedure, pass(self) :: add_vec2d
      procedure, pass(self) :: sub_vec2d
      procedure, pass(self) :: dotprod_vec2d

      generic :: assignment(=) => equal_vec2d
      generic :: operator(+) => add_vec2d
      generic :: operator(-) => sub_vec2d
      generic :: operator(.dotprod.) => dotprod_vec2d

   END TYPE vec2d

CONTAINS

   PURE ELEMENTAL SUBROUTINE equal_vec2d( self , from )

      class(vec2d), intent(inout) :: self
      class(vec2d), intent(in   ) :: from

      self%x = from%x
      self%y = from%y

   END SUBROUTINE equal_vec2d

   PURE ELEMENTAL FUNCTION add_vec2d( self , from ) RESULT( this )

      class(vec2d), intent(in) :: self , from

      type(vec2d) :: this

      this%x = self%x + from%x
      this%y = self%y + from%y

   END FUNCTION add_vec2d

   PURE ELEMENTAL FUNCTION sub_vec2d( self , from ) RESULT( this )

      class(vec2d), intent(in) :: self , from

      type(vec2d) :: this

      this%x = self%x - from%x
      this%y = self%y - from%y

   END FUNCTION sub_vec2d

   PURE ELEMENTAL FUNCTION dotprod_vec2d( self , from ) RESULT( dotprod )

      class(vec2d), intent(in) :: self , from

      real(rp) :: dotprod

      dotprod = self%x * from%x + &
                self%y * from%y

   END FUNCTION dotprod_vec2d

END MODULE m_vec_tens

MODULE m_field

   USE m_vec_tens

   TYPE field

      type(vec2d), allocatable :: v1(:)
      type(vec2d), allocatable :: v2(:)

   END TYPE field

END MODULE m_field

PROGRAM test_vec_tens

   USE m_field

   implicit none

   integer(ip), parameter :: n = 100000

   integer(8) :: count , count_scale , count_max

   integer(ip) :: i , j , k , nt

   real(rp) :: time , rate

   real(rp), dimension(n) :: s1 , s2 , s3 , s4 , s5 , s6

   type(vec2d), dimension(n)  ::  v1 , v2 , v3

   type(field) :: f

   call random_number( s3(1:n)   )
   call random_number( s4(1:n)   )
   call random_number( s5(1:n)   )
   call random_number( s6(1:n)   )
   call random_number( v2(1:n)%x )
   call random_number( v2(1:n)%y )
   call random_number( v3(1:n)%x )
   call random_number( v3(1:n)%y )

   call system_clock( count , count_scale , count_max )

   rate = real( count_scale , 8 )

   time = real( count , 8 )

   do nt = 1,1000

      s1(1:n) = s3(1:n) + s4(1:n)
      s2(1:n) = s5(1:n) + s6(1:n)

      s3(1) = s2(n)
      s5(1) = s1(n)

   end do

   call system_clock( count , count_scale , count_max )

   write(6,*) ( real( count , 8 ) - time ) / rate

   time = real( count , 8 )

! LOOP BEGIN at test_vec_tens.f90(49,28) inlined into test_vec_tens.f90(166,25)
!    remark #25096: Loop Interchange not done due to: Imperfect Loop Nest (Either at Source or due to other Compiler Transformations)
!    remark #25451: Advice: Loop Interchange, if possible, might help loopnest. Suggested Permutation : ( 1 2 ) --> ( 2 1 )
!    remark #15344: loop was not vectorized: vector dependence prevents vectorization
!    remark #15346: vector dependence: assumed FLOW dependence between 251.Y line 56 and 251.Y line 45
!    remark #15346: vector dependence: assumed ANTI dependence between 251.Y line 45 and 251.Y line 56

!    LOOP BEGIN at test_vec_tens.f90(166,7)
!       remark #25427: Loop Statements Reordered
!       remark #15344: loop was not vectorized: vector dependence prevents vectorization
!       remark #15346: vector dependence: assumed FLOW dependence between 251.Y line 56 and 251.Y line 45
!       remark #15346: vector dependence: assumed ANTI dependence between 251.Y line 45 and 251.Y line 56
!       remark #25438: unrolled without remainder by 2
!    LOOP END
! LOOP END

   do nt = 1,1000

      v1(1:n) = v2(1:n) + v3(1:n)

      v2(1)%x = v1(n)%y
      v2(1)%y = v1(n)%x

   end do

   call system_clock( count , count_scale , count_max )

   write(6,*) ( real( count , 8 ) - time ) / rate

   allocate( f%v1(n) )
   allocate( f%v2(n) )

! test_vec_tens.f90(187): error #6866: Dotted string is neither a defined operator nor a structure component.   [DOTPROD]
!       s1(i) = f%v1(i) .dotprod. f%v2(i)
! -----------------------^
! compilation aborted for test_vec_tens.f90 (code 1)

!    do i = 1,n

!       s1(i) = f%v1(i) .dotprod. f%v2(i)

!    end do

   v1(1:n) = f%v1(1:n)
   v2(1:n) = f%v2(1:n)

   do i = 1,n

      s1(i) = v1(i) .dotprod. f%v2(i)

   end do

   do i = 1,n

      s1(i) = f%v1(i) .dotprod. v2(i)

   end do

END PROGRAM test_vec_tens

with my configuration:

ifort --version
ifort (IFORT) 16.0.1 20151021
Copyright (C) 1985-2015 Intel Corporation.  All rights reserved.

Linux gmmpc209 4.2.0-35-generic #40-Ubuntu SMP Tue Mar 15 22:15:45 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

first, it seams that there is a bug in compiler considering two allocatable array of vec2d type in field type container when I want to use the generic binding operator .dotprod.

If I copy the array to a local array, no problem in both directions

If I use en interface operator to overload .dotprod. rather than generic binding, no problem at all

secondly, I'm not a expert but I'm little confused with vectorization of my overloaded operators

time computations are very similar if I do not use -ipo optimization flag with other -fast flags for example and very different with

any advices or comments ?

thanks,

Fred Couderc

Xiaoping_D_Intel · ‎05-09-2016

I have reproduced the error on the operator and open a defect report DPD200410721

Thanks

Xiaoping Duan

Intel Customer Support