- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi,
I have two identical codes (matrix multiplications and a solution of a linear system of equations) running on a Win32 (on windows vista) and the other on x64 (windows xp). Strickingly, the win32 version is a lot faster. Any clues why?
The code includes calls to routines dgetri dgetrf dgesv (see below)
Thanks,
Pablo
subroutine
motionstatesprueba(stateslag,statesfirstlag,w1,densidad,states,statesfirst,time)
!subroutine motionstates(SauxG,VV,SauxGp)
usenrtype
usemod_calibration
usematrix_calibration
useparticle_calibration
real (dp), dimension(nx-nshk,1), intent(in) :: stateslag,statesfirstlag
integer, intent(in) :: time
integer:: i, ii, info, lwork
real (dp), dimension(nshk-nshkw2), intent(in) :: w1
real (dp), dimension(nnp12,ny) :: Caux
real (dp), dimension(nshk-nshkw1,nshk-nshkw1) :: pabloB, pabloBsave, Bfact, work, pabloBaux
real (dp), dimension(nshk-nshkw1,1) :: pabloA,pabloAsave,pabloAsave2
real (dp), dimension(nshk-nshkw1,1) :: solw2,ruido
real (dp), dimension(1,nx) :: tSauxG
real (dp), dimension(nx,1) :: SauxG
real (dp), dimension(1,1) :: data1, data2, data3, data4, data5, mas
real (dp), dimension(nx-nshkw2,1) :: myvariables_without_w2
real (dp), dimension(nnx21,1) :: myvariables_without_w21,myvariables_without_w21_first
real (dp), dimension(1,nx-nshkw2) :: tmyvariables_without_w2
real (dp), dimension(1,nnx21) :: tmyvariables_without_w21
real(dp) :: uphi, umu, ue, ud, ua, jacob,det1
real(dp) :: matrix_mul, produ, dete
integer, dimension(nshkw2) :: ipiv, ipvt
real (dp), dimension(nx-nshk,1), intent(out) :: states,statesfirst
real (dp), intent(out) :: densidad
!!!!!!!!!!!!!!!!!!
!!!!! CHECK WHETHER stateslag(2,1)-sigmamus OR stateslag(2,1)
!!!!! i think is should be just stateslag(2,1) as the state is in log deviation
!!!!! check at the end too
!!!!! WARNING: FUNCTION PRODU ONLY WORKS FOR MATRIX 21x1
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! stateslag=sigphilag, sigmulag, sigelag, sigdlag, sigalag, philag, dlag,
! ylag, xlag, pilag, clag, wlag, vwlag, vlag, k, Rlag, lambda
! w1 = uphi, umu, ue, ud, ua
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! 0. Initialize some matrices
pabloB = 0_dp
pabloA = 0_dp
! 1. Get Data
calldrnnor(5, ruido)
data1(1:1,1) = ourdata(time:time,1)
data2(1,1) = ourdata(time,2)
data3(1,1) = ourdata(time,3)
data4(1,1) = ourdata(time,4)
data5(1,1) = ourdata(time,5)
!myvariables_without_w2(1:nshkw1,1) = w1
!myvariables_without_w2(nshkw1+1:nx-nshkw2,1) = stateslag(:,1)
!tmyvariables_without_w2=transpose(myvariables_without_w2)
! 2. Format matrices for faster multiplication
myvariables_without_w21(1:nshkw1,1) = w1
myvariables_without_w21(nshkw1+1:nnx21,1) = stateslag(1:nx-nshkw-1,1)
myvariables_without_w21_first(1:nshkw1,1) = w1
myvariables_without_w21_first(nshkw1+1:nnx21,1) = statesfirstlag(1:nx-nshkw-1,1)
tmyvariables_without_w21=
transpose(myvariables_without_w21)
! 3. Assign w1 errors
uphi=w1(1)
umu=w1(2)
ue=w1(3)
ud=w1(4)
ua=w1(5)
! 4. Law of motions for observables
! 4.1 Relative price of investment
pabloA(1:1,1:1) = (data1(1,1) + meanmu - (produ(psi1obs1_without_w21,myvariables_without_w21,nnx21)+ &
0.5_dp*(matrix_mul(psi2obs1_without_w21,myvariables_without_w21_first,nnx21)+psi2obs1(nx,nx))))
! pabloA(1:1,1:1) = (data1(1,1) - meanmu - (produ(psi1obs1_without_w21,myvariables_without_w21,nnx21)))
pabloB(1,2) = psi2obs1(7,2)*umu
! 4.2 Nominal interest rate
pabloA(2:2,1:1) = (data2(1,1) - bigrs - (produ(psi1obs2_without_w21,myvariables_without_w21,nnx21)+ &
0.5_dp*(matrix_mul(psi2obs2_without_w21,myvariables_without_w21_first,nnx21)+psi2obs2(nx,nx))))
pabloB(2,1) = psi2obs2(6,1)*uphi
pabloB(2,2) = psi2obs2(7,2)*umu
pabloB(2,3) = psi2obs2(8,3)*ue
pabloB(2,4) = psi2obs2(9,4)*ud
pabloB(2,5) = psi2obs2(10,5)*ua
! inflation
pabloA(3:3,1:1) = (data3(1,1) - pis - (produ(psi1obs3_without_w21,myvariables_without_w21,nnx21)+&
0.5_dp*(matrix_mul(psi2obs3_without_w21,myvariables_without_w21_first,nnx21)+psi2obs3(nx,nx))))
pabloB(3,1) =psi2obs3(6,1)*uphi
pabloB(3,2) =psi2obs3(7,2)*umu
pabloB(3,3) =psi2obs3(8,3)*ue
pabloB(3,4) =psi2obs3(9,4)*ud
pabloB(3,5) =psi2obs3(10,5)*ua
! output
pabloA(4:4,1:1) = data4(1,1) -
log(growth) - (produ(psi1obs4_without_w21,myvariables_without_w21,nnx21)+&
0.5_dp*(matrix_mul(psi2obs4_without_w21,myvariables_without_w21_first,nnx21)+psi2obs4(nx,nx)))
pabloB(4,1) = psi2obs4(6,1)*uphi
pabloB(4,2) = psi2obs4(7,2)*umu
pabloB(4,3) = psi2obs4(8,3)*ue
pabloB(4,4) = psi2obs4(9,4)*ud
pabloB(4,5) = psi2obs4(10,5)*ua
! real wage
pabloA(5:5,1:1) = data5(1,1) -
log(growth) - (produ(psi1obs5_without_w21,myvariables_without_w21,nnx21)+&
0.5_dp*(matrix_mul(psi2obs5_without_w21,myvariables_without_w21_first,nnx21)+psi2obs5(nx,nx)))
pabloB(5,1) = psi2obs5(6,1)*uphi
pabloB(5,2) = psi2obs5(7,2)*umu
pabloB(5,3) = psi2obs5(8,3)*ue
pabloB(5,4) = psi2obs5(9,4)*ud
pabloB(5,5) = psi2obs5(10,5)*ua
! write(14,'(5F10.6)') pabloA
! write(14,'(5F12.6)') pabloB(1,:)
! write(14,'(5F12.6)') pabloB(2,:)
! write(14,'(5F12.6)') pabloB(3,:)
! write(14,'(5F12.6)') pabloB(4,:)
! write(14,'(5F12.6)') pabloB(5,:)
! compute w2
! order in solw2
! solw2(1) = usigphi, solw2(2) = usigmu, solw2(3) = usige, solw2(4) = usigd, solw2(5) = usiga
pabloAsave=pabloA
pabloBsave=pabloB
! faster code to solve system; set dynamic library in project; linker; input
callDGESV (nshkw2,1,pabloB,nshkw2,ipiv,pabloA,nshkw2,info)
solw2 = pabloA
! write(14,'(5F16.4)') solw2
! do i = 1,nshk-nshkw1
! enddo
pabloAsave2=
matmul(pabloBsave,solw2)
!if (info.ne.0) then
! pabloA = 10000_dp
!endif
! compute jacobian for likelihood
lwork = nshkw2
callDGETRI (nshkw2,pabloB,nshkw2,ipiv,work,lwork,info)
callDGETRF (nshkw2,nshkw2,pabloB,nshkw2,ipvt,info)
!if (info.ne.0) then
! dete = 0_dp
!else
dete = 1_dp
doi = 1,nshkw2
if (ipvt(i).ne.i)then
dete = -dete * pabloB(i,i)
else
dete = dete * pabloB(i,i)
endif
end do
!endif
jacob =
abs(dete)
!write(14,'(F16.10)') jacob
!pause
! assamble complete state
!solw2(1,1) = 1_dp
!solw2(2,1) = -1_dp
!solw2(3,1) = 0.1_dp
!solw2(4,1) = -0.2_dp
!solw2(5,1) = 0.001_dp
! second order
states(8:8,1:1) = produ(psi1y_without_w21,myvariables_without_w21,nnx21)&
+0.5_dp*(matrix_mul(psi2y_without_w21,myvariables_without_w21_first,nnx21)+ psi2y(nx,nx)) + psi2y(6,1)*uphi*solw2(1,1) &
+ psi2y(7,2)*umu*solw2(2,1) + psi2y(8,3)*ue*solw2(3,1) + psi2y(9,4)*ud*solw2(4,1) + psi2y(10,5)*ua*solw2(5,1)
states(16:16,1:1) = produ(psi1bigr_without_w21,myvariables_without_w21,nnx21)&
+0.5_dp*(matrix_mul(psi2bigr_without_w21,myvariables_without_w21_first,nnx21) + psi2bigr(nx,nx)) + psi2bigr(6,1)*uphi*solw2(1,1) &
+ psi2bigr(7,2)*umu*solw2(2,1) + psi2bigr(8,3)*ue*solw2(3,1) + psi2bigr(9,4)*ud*solw2(4,1) + psi2bigr(10,5)*ua*solw2(5,1)
states(15:15,1:1) = produ(psi1k_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2k_without_w21,myvariables_without_w21_first,nnx21) + psi2k(nx,nx)) + psi2k(6,1)*uphi*solw2(1,1) &
+ psi2k(7,2)*umu*solw2(2,1) + psi2k(8,3)*ue*solw2(3,1) + psi2k(9,4)*ud*solw2(4,1) + psi2k(10,5)*ua*solw2(5,1)
states(14:14,1:1) = produ(psi1v_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2v_without_w21,myvariables_without_w21_first,nnx21)+ psi2v(nx,nx)) + psi2v(6,1)*uphi*solw2(1,1) &
+ psi2v(7,2)*umu*solw2(2,1) + psi2v(8,3)*ue*solw2(3,1) + psi2v(9,4)*ud*solw2(4,1) + psi2v(10,5)*ua*solw2(5,1)
states(13:13,1:1) = produ(psi1vw_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2vw_without_w21,myvariables_without_w21_first,nnx21) + psi2vw(nx,nx)) + psi2vw(6,1)*uphi*solw2(1,1) &
+ psi2vw(7,2)*umu*solw2(2,1) + psi2vw(8,3)*ue*solw2(3,1) + psi2vw(9,4)*ud*solw2(4,1) + psi2vw(10,5)*ua*solw2(5,1)
states(12:12,1:1) = produ(psi1w_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2w_without_w21,myvariables_without_w21_first,nnx21) + psi2w(nx,nx)) + psi2w(6,1)*uphi*solw2(1,1) &
+ psi2w(7,2)*umu*solw2(2,1) + psi2w(8,3)*ue*solw2(3,1) + psi2w(9,4)*ud*solw2(4,1) + psi2w(10,5)*ua*solw2(5,1)
states(11:11,1:1) = produ(psi1c_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2c_without_w21,myvariables_without_w21_first,nnx21) + psi2c(nx,nx)) + psi2c(6,1)*uphi*solw2(1,1) &
+ psi2c(7,2)*umu*solw2(2,1) + psi2c(8,3)*ue*solw2(3,1) + psi2c(9,4)*ud*solw2(4,1) + psi2c(10,5)*ua*solw2(5,1)
states(10:10,1:1) = produ(psi1pi_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2pi_without_w21,myvariables_without_w21_first,nnx21) + psi2pi(nx,nx)) + psi2pi(6,1)*uphi*solw2(1,1) &
+ psi2pi(7,2)*umu*solw2(2,1) + psi2pi(8,3)*ue*solw2(3,1) + psi2pi(9,4)*ud*solw2(4,1) + psi2pi(10,5)*ua*solw2(5,1)
states(9:9,1:1) = produ(psi1x_without_w21,myvariables_without_w21,nnx21) &
+0.5_dp*(matrix_mul(psi2x_without_w21,myvariables_without_w21_first,nnx21) + psi2x(nx,nx))+ psi2x(6,1)*uphi*solw2(1,1) &
+ psi2x(7,2)*umu*solw2(2,1) + psi2x(8,3)*ue*solw2(3,1) + psi2x(9,4)*ud*solw2(4,1) + psi2x(10,5)*ua*solw2(5,1)
states(17:17,1:1) = 1.0_dp
states(5:5,1:1) = rhosigmaa*stateslag(5,1) + netaa*solw2(5,1)
states(4:4,1:1) = rhosigmad*stateslag(4,1) + netad*solw2(4,1)
states(3:3,1:1) = rhosigmae*stateslag(3,1) + netae*solw2(3,1)
states(2:2,1:1) = rhosigmamu*stateslag(2,1) + netamu*solw2(2,1)
states(1:1,1:1) = rhosigmaphi*stateslag(1,1) + netaphi*solw2(1,1)
states(7:7,1:1) = rhod*stateslag(7:7,1:1)+(2.71828182846_dp)**(sigmads)*ud+&
0.5_dp*(2*(2.71828182846_dp)**(sigmads)*rhosigmad*ud*statesfirstlag(4:4,1:1)+&
2*(2.71828182846_dp)**(sigmads)*netad*ud*solw2(4,1))
states(6:6,1:1) = rhocurlyphi*stateslag(6:6,1:1)+(2.71828182846_dp)**(sigmaphis)*uphi+&
0.5_dp*(2*(2.71828182846_dp)**(sigmaphis)*rhosigmaphi*uphi*statesfirstlag(1:1,1:1)+&
2*(2.71828182846_dp)**(sigmaphis)*netaphi*uphi*solw2(1,1))
! first order
statesfirst(8:8,1:1) = produ(psi1y_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(16:16,1:1) = produ(psi1bigr_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(15:15,1:1) = produ(psi1k_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(14:14,1:1) = produ(psi1v_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(13:13,1:1) = produ(psi1vw_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(12:12,1:1) = produ(psi1w_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(11:11,1:1) = produ(psi1c_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(10:10,1:1) = produ(psi1pi_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(9:9,1:1) = produ(psi1x_without_w21,myvariables_without_w21_first,nnx21)
statesfirst(17:17,1:1) = 1.0_dp
statesfirst(5:5,1:1) = rhosigmaa*statesfirstlag(5,1) + netaa*solw2(5,1)
statesfirst(4:4,1:1) = rhosigmad*statesfirstlag(4,1) + netad*solw2(4,1)
statesfirst(3:3,1:1) = rhosigmae*statesfirstlag(3,1) + netae*solw2(3,1)
statesfirst(2:2,1:1) = rhosigmamu*statesfirstlag(2,1) + netamu*solw2(2,1)
statesfirst(1:1,1:1) = rhosigmaphi*statesfirstlag(1,1) + netaphi*solw2(1,1)
statesfirst(7:7,1:1) = rhod*statesfirstlag(7:7,1:1)+(2.71828182846_dp)**(sigmads)*ud
statesfirst(6:6,1:1) = rhocurlyphi*statesfirstlag(6:6,1:1)+(2.71828182846_dp)**(sigmaphis)*uphi
! compute density
! print *, 'jacob',jacob
densidad = jacob*(2.71828182846_dp)**(-(solw2(1,1)**2+solw2(2,1)**2+solw2(3,1)**2+solw2(4,1)**2+solw2(5,1)**2)/2)/(2*pi)**(nshkw2/2)
!densidad = (2.71828182846_dp)**(-(solw2(1,1)**2+solw2(2,1)**2+solw2(3,1)**2+solw2(4,1)**2+solw2(5,1)**2)/2)/(2*pi)**(nshkw2/2)
! write(14,'(5F10.8,2F16.6)') solw2,densidad,jacob
!open (unit=14, file='pabloBsave.txt',status='replace',action='write')
! write(14,*) pabloBsave
!close (unit = 14)
!open (unit=14, file='pabloAsave.txt',status='replace',action='write')
! write(14,*) pabloAsave
!close (unit = 14)
end subroutine
motionstatesprueba
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Sounds like a job for VTune. Get samplling results for both, see if the hotspot distribution is roughly the same. The decision tree gets a lot less bushy once we know the results of the basic profiling runs.
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page