Dear All,
I'm a newbie of Intel Cluster OpenMP and woking on my fortran code as seen below
using my small Core2Duo clusters with GbE network (3 nodes, each has single Core2Duo).
In my testings, "serial" run and "-openmp" run show reasonable speedup of CPU time,
but failed in the case using "-cluster-openmp" run.
CPU times were as below:
"sereal" run 7.6 (s
"-openmp" run 4.4 (s)
"-cluster-openmp" run 113.4 (s) !!!!!
(other options are "-O3 -ip -ipo -ftz")
I'd like to know to some guidance to change my code.
Thanks in advance;
S.Wakashima
-----Code:
!***********************************************
! 2D diffusion equation
! (B.C.s are constant)
!***********************************************
program training_omp
!$ use omp_lib
use ifport ! for secnds() function
implicit none
integer,parameter :: inx=250,jnx=250
real(8) :: uu(inx,jnx),rhs(inx,jnx)
real(8) :: dt, dx, dy
real(8) :: dxinv, dyinv
real(8) :: diff
real(8) :: ddd1,ddd2
real(8) :: rtime
real(4) :: t1,t2
real(8) :: time_s,time_e
real(8) :: ts,te
integer :: i,j,k
!dir$ omp sharable(k,uu,rhs,dxinv,dyinv,dt,diff,rtime)
!---- params. ---------------
dt = 0.5d-5 ! timestep
dx = 1.0d-2 ! x increment
dy = 1.0d-2 ! y increment
dxinv = 1.0d0/(dx**2)
dyinv = 1.0d0/(dy**2)
diff = 0.1d0 ! diffusion coef.
rtime = 0.0d0
!---uu init -----------------
uu (:,:) = 0.0d0
rhs(:,:) = 0.0d0
do j = 150, 200
do i = 150, 200
uu(i,j) = 10.d0
enddo
enddo
call cpu_time(time_s)
t1 = secnds(0.0)
!$ ts = omp_get_wtime()
!time marching---------------------------------
do k = 1, 50000
!----------------------------------------------
rtime = rtime + dt
!$omp parallel private(ddd1,ddd2)
!$omp do
do j=2,jnx-1
do i=2,inx-1
ddd1 = dxinv * (uu(i-1,j)-2.d0*uu(i,j)+uu(i+1,j))
ddd2 = dyinv * (uu(i,j-1)-2.d0*uu(i,j)+uu(i,j+1))
rhs(i,j) = diff * (ddd1 + ddd2) * dt
enddo
enddo
!$omp do
do j=2,jnx-1
do i=2,inx-1
uu(i,j) = uu(i,j) + rhs(i,j)
enddo
enddo
!$omp end parallel
!----------------------------------------------
enddo
!----------------------------------------------
!$ write(6,*) 'passed ', omp_get_wtime()-ts
call cpu_time(time_e)
t2 = secnds(t1)
write(*,*) 'passed ',time_e-time_s
write(*,*) 'passed ',t2
open(1,file="test.dat")
do j=1,jnx
do i=1,inx
write(1,'(3e15.7)') (i-1)*dx, (j-1)*dy, uu(i,j)
enddo
write(1,*)
enddo
close(1)
stop
end program training_omp
---- KMP_CUSTER.INI:
### option lines
--hostlist=master,cluster01,cluster02 \
--processes=3 \
--process-threads=2 \
--launch=rsh \
--sharable_heap=2G \
--divert-twins