Intel® Fortran Compiler
Build applications that can scale for the future with optimized code designed for Intel® Xeon® and compatible processors.
27431 Discussions

Poor openmp scaling with ifort but not gfortran

hweintraub
Beginner
195 Views

I’ve been seeing some strange openmp scaling behavior that I’m not sure how to explain.
I have a simple test program that scales nicely when compiled with gfortran but poorly when compiled with ifort.

 

hweintraub_0-1639507125307.png

 

My test program is the following:

module parserMod
  use function_parser, only : fparser_array
  implicit none

  type(fparser_array), save :: parser
  !$omp threadprivate(parser)


end module parserMod
!-------------------------------------------------------------------------------
subroutine parallelMarbles(marbles, numThreads)

  use parserMod, only : parser
  use iso_fortran_env, only: wp => real64
  use iso_fortran_env, only : output_unit

  real(wp), dimension(6,200000), intent(inout) :: marbles
  integer, intent(in)                     :: numThreads


  integer :: indx
  character(len=1), dimension(3), parameter :: parserVars = ['x', 'y', 'z']

  ! All threads initialize the parser
  !$omp parallel num_threads(numThreads)
  call parser%parse(parserVars,parserVars)
  if (parser%error()) then
    call parser%print_errors(output_unit)
    stop 99
  endif
  !$omp end parallel

  !$omp parallel do default(none) &
  !$omp private(indx) &
  !$omp shared(marbles) &
  !$omp num_threads(numThreads)
  do indx = 1, size(marbles(1, :))    
      marbles(1,indx) = 1
      call doWork(marbles(:,indx))
  end do
  !$omp end parallel do

end subroutine parallelMarbles
!-------------------------------------------------------------------------------
subroutine doWork(marble)
  use omp_lib, only : omp_get_thread_num
  use parserMod, only : parser
  use iso_fortran_env, only: wp => real64
  
  implicit none
  real(wp), dimension(6), intent(inout) :: marble

  integer :: indx
  
  do indx = 1, 200
    marble(2) = mod(indx, 6 + omp_get_thread_num())*marble(1)
    marble(3) = mod(indx, 5 + omp_get_thread_num())*marble(1)
    marble(4) = mod(indx, 4 + omp_get_thread_num())*marble(1)

    call parser%evaluate(marble(1:3), marble(4:6))
    
    marble(1) = sum(marble(2:))
  end do

end subroutine doWork
!-------------------------------------------------------------------------------
program testOMP
  use iso_fortran_env, only: wp => real64

  ! real(wp), allocatable, dimension(:,:)    :: marbles
  real(wp), dimension(6,200000)    :: marbles
  integer                                  :: numThreads
  real                                     :: singleTime, threadTime

  integer :: startTime, endTime, countRate, countMax
  character(len=25)   :: varString


  ! allocate(marbles(6,200000))
  do numThreads = 1, 4, 3
    write(*,*) 'Calling parallel marbles with ', numThreads, ' threads.'  

    call system_clock(startTime, countRate, countMax)
    call parallelMarbles(marbles, numThreads)
    call system_clock(endTime)

    threadTime = (dble(endTime) - dble(startTime))/dble(countRate)

    write (varString, '(F25.6)') threadTime
    write (*, '(A)') ' Loop time = ' // trim(adjustl(varString)) // ' seconds.'

    if (numThreads .eq. 1) then
      singleTime = threadTime
    endif

    write (varString, '(F25.6)') singleTime / threadTime
     write (*, '(A)') ' Speedup = ' // trim(adjustl(varString)) // 'x.'


    write(*,*) '------------------------------------------------------'
  end do
end program testOMP

 My code uses the fortran_function_parser module from:

https://github.com/jacobwilliams/fortran_function_parser

 

I've attached the source files for the test program along with the fortran function parser module for convenience.

 

Any insight into what might be going wrong in ifort and how I might improve the performance with ifort would be greatly appreciated.

 

Labels (2)
0 Replies
Reply