- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
I am currently trying to write a flow solver which uses 3 dimensional arrays to represent the different space directions.
I have declared the following array.
phi(1:64, 1:64, 1:64, 1:4) on which I have defined the following 3 subroutines.
-------------------------------------------------------------
1) subroutine compute_u1_cor
which does
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl)+1:bounds(1,2,bl)+1, &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) - &
phi (bounds(1,1,bl)-1:bounds(1,2,bl)-1, &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)))*rc1
end do
--------------------------------------------------------------
2) subroutine compute_u2_cor
which does
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl)+1:bounds(2,2,bl)+1, &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) - &
phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl)-1:bounds(2,2,bl)-1, &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)))*rc1
end do
----------------------------------------------------------------
subroutine compute_u3_cor which does
which does
! Compute gradient in the x1 direction
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl)+1:bounds(3,2,bl)+1, &
bl_ind(bl)) - &
phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl)-1:bounds(3,2,bl)-1, &
bl_ind(bl)))*rc1
end do
!--------------------------------------------------------------------------
When I use the profiler to compare the times of the 2 different subroutines I get vast differences :
pres_corr..compute_u3_cor_ [60] - 0.08
pres_corr..compute_u1_cor_ [65] - 0.06
pres_corr..compute_u2_cor_ [71] - 0.02
I was assuming using all the in trinsic array calls would cause the compiler to optimize much better. However there is this huge disparity?
Does this mean I need to explicitly specify the looping order and not allow the compiler to decide this?
Is there something about the way I implemented this which is wrong?
Is there some compiler option I am missing out on?
Any help is greatly appreciated.
Thanks,
Giri.
I have declared the following array.
phi(1:64, 1:64, 1:64, 1:4) on which I have defined the following 3 subroutines.
-------------------------------------------------------------
1) subroutine compute_u1_cor
which does
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl)+1:bounds(1,2,bl)+1, &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) - &
phi (bounds(1,1,bl)-1:bounds(1,2,bl)-1, &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)))*rc1
end do
--------------------------------------------------------------
2) subroutine compute_u2_cor
which does
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl)+1:bounds(2,2,bl)+1, &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) - &
phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl)-1:bounds(2,2,bl)-1, &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)))*rc1
end do
----------------------------------------------------------------
subroutine compute_u3_cor which does
which does
! Compute gradient in the x1 direction
do bl = 1,mbl
work (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl) :bounds(3,2,bl) , &
bl_ind(bl)) = &
(phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl)+1:bounds(3,2,bl)+1, &
bl_ind(bl)) - &
phi (bounds(1,1,bl) :bounds(1,2,bl) , &
bounds(2,1,bl) :bounds(2,2,bl) , &
bounds(3,1,bl)-1:bounds(3,2,bl)-1, &
bl_ind(bl)))*rc1
end do
!--------------------------------------------------------------------------
When I use the profiler to compare the times of the 2 different subroutines I get vast differences :
pres_corr..compute_u3_cor_ [60] - 0.08
pres_corr..compute_u1_cor_ [65] - 0.06
pres_corr..compute_u2_cor_ [71] - 0.02
I was assuming using all the in trinsic array calls would cause the compiler to optimize much better. However there is this huge disparity?
Does this mean I need to explicitly specify the looping order and not allow the compiler to decide this?
Is there something about the way I implemented this which is wrong?
Is there some compiler option I am missing out on?
Any help is greatly appreciated.
Thanks,
Giri.
Link Copied
1 Reply
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
No, the first requirement for optimization is for the compiler to take the operands in stride 1 order, and use parallel instructions where applicable. If it fails to recognize the correct inner loop, all is lost. Beyond that, advantage could likely be gained by a little unrolling on the middle and outer loops, but the compiler probably doesn't do that any better with array assignment notation than with explicit loops.

Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page