- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I’ve been seeing some strange openmp scaling behavior that I’m not sure how to explain.
I have a simple test program that scales nicely when compiled with gfortran but poorly when compiled with ifort.
My test program is the following:
module parserMod
use function_parser, only : fparser_array
implicit none
type(fparser_array), save :: parser
!$omp threadprivate(parser)
end module parserMod
!-------------------------------------------------------------------------------
subroutine parallelMarbles(marbles, numThreads)
use parserMod, only : parser
use iso_fortran_env, only: wp => real64
use iso_fortran_env, only : output_unit
real(wp), dimension(6,200000), intent(inout) :: marbles
integer, intent(in) :: numThreads
integer :: indx
character(len=1), dimension(3), parameter :: parserVars = ['x', 'y', 'z']
! All threads initialize the parser
!$omp parallel num_threads(numThreads)
call parser%parse(parserVars,parserVars)
if (parser%error()) then
call parser%print_errors(output_unit)
stop 99
endif
!$omp end parallel
!$omp parallel do default(none) &
!$omp private(indx) &
!$omp shared(marbles) &
!$omp num_threads(numThreads)
do indx = 1, size(marbles(1, :))
marbles(1,indx) = 1
call doWork(marbles(:,indx))
end do
!$omp end parallel do
end subroutine parallelMarbles
!-------------------------------------------------------------------------------
subroutine doWork(marble)
use omp_lib, only : omp_get_thread_num
use parserMod, only : parser
use iso_fortran_env, only: wp => real64
implicit none
real(wp), dimension(6), intent(inout) :: marble
integer :: indx
do indx = 1, 200
marble(2) = mod(indx, 6 + omp_get_thread_num())*marble(1)
marble(3) = mod(indx, 5 + omp_get_thread_num())*marble(1)
marble(4) = mod(indx, 4 + omp_get_thread_num())*marble(1)
call parser%evaluate(marble(1:3), marble(4:6))
marble(1) = sum(marble(2:))
end do
end subroutine doWork
!-------------------------------------------------------------------------------
program testOMP
use iso_fortran_env, only: wp => real64
! real(wp), allocatable, dimension(:,:) :: marbles
real(wp), dimension(6,200000) :: marbles
integer :: numThreads
real :: singleTime, threadTime
integer :: startTime, endTime, countRate, countMax
character(len=25) :: varString
! allocate(marbles(6,200000))
do numThreads = 1, 4, 3
write(*,*) 'Calling parallel marbles with ', numThreads, ' threads.'
call system_clock(startTime, countRate, countMax)
call parallelMarbles(marbles, numThreads)
call system_clock(endTime)
threadTime = (dble(endTime) - dble(startTime))/dble(countRate)
write (varString, '(F25.6)') threadTime
write (*, '(A)') ' Loop time = ' // trim(adjustl(varString)) // ' seconds.'
if (numThreads .eq. 1) then
singleTime = threadTime
endif
write (varString, '(F25.6)') singleTime / threadTime
write (*, '(A)') ' Speedup = ' // trim(adjustl(varString)) // 'x.'
write(*,*) '------------------------------------------------------'
end do
end program testOMP
My code uses the fortran_function_parser module from:
https://github.com/jacobwilliams/fortran_function_parser
I've attached the source files for the test program along with the fortran function parser module for convenience.
Any insight into what might be going wrong in ifort and how I might improve the performance with ifort would be greatly appreciated.
Link Copied
0 Replies
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page