- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Link Copied
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The modified version below demonstrates using an allocatable array passed as an argument with data persistence.
[cpp] program example2
implicit none
integer, parameter :: SZ = 10000
real, allocatable,dimension(:) :: matrix
integer i
allocate(matrix(SZ))
! Allocate memory on coprocessor only
!dir$ offload_transfer target(mic:0) in(matrix : length(SZ) &
alloc_if(.true.) free_if(.false.))
! Initialize values on the host
!$omp parallel do private(i)
do i=1,SZ
matrix(i)=99.0
end do
print *,'start'
print *,'b4 offload_transfer'
print '(a,f6.2)','matrix(1)= ',matrix(1)
print '(a,f6.2)','matrix(2)= ',matrix(2)
print '(a,I0,a,f6.2)','matrix(',SZ,')= ',matrix(SZ)
! Send current values to coprocessor only and reuse previous allocation
!dir$ offload_transfer target(mic:0) in(matrix : length(SZ) &
alloc_if(.false.) free_if(.false.))
print *,'af offload_transfer'
! change this element on the host only to demonstate values on
! coprocessor are used
matrix(1) = 0.0
print '(a,z)','GW b4 call t3s2 loc(matrix)=',loc(matrix)
call t3s2(matrix,SZ)
! Transfer current values on coprocessor to host and free allocation
!dir$ offload_transfer target(mic:0) out(matrix : length(SZ) &
alloc_if(.false.) free_if(.true.))
print '(a)','Expected values are 99.0, 100.0, 98.0'
print '(a,f6.2)','matrix(1)= ',matrix(1)
print '(a,f6.2)','matrix(2)= ',matrix(2)
print '(a,I0,a,f6.2)','matrix(',SZ,')= ',matrix(SZ)
end program example2
subroutine t3s2(matA,siz)
implicit none
integer, intent(in) :: siz
real, dimension(*), intent(inout) :: matA
integer i
print *,'GW start t3s2'
print '(a,z)','GW inside t3s2 loc(matA)=',loc(matA)
! With length(0), no data is transferred to the coprocessor
! With IN and no allocation set, the pointer value for this
! offload is only initialized to the value created in the calling
! scope for the associated actual argument in the caller;
! therefore, this re-uses the previous allocation on the coprocessor
! for the associated actual argument
! Modify one existing value (i.e. 99.0) on the coprocessor and
! leave value on coprocessor
!dir$ offload begin target(mic:0) in(siz) in(matA : length(0) &
alloc_if(.false.) free_if(.false.))
matA(siz)= matA(siz) - 1
!dir$ end offload
! Re-use values on coprocessor, update some elements of array,
! leave values on coprocessor
!dir$ offload target(mic:0) in(siz) nocopy (matA : length(0) &
alloc_if(.false.) free_if(.false.))
!$omp parallel do private(i)
do i=2,(siz-1)
matA(i)=100.0
end do
end subroutine t3s2 [/cpp]
[plain]$ ./a.out
[Offload] [MIC 0] [File] example2.F90
[Offload] [MIC 0] [Line] 11
[Offload] [MIC 0] [Tag] Tag 0
[Offload] [HOST] [Tag 0] [CPU Time] 1.005746(seconds)
[Offload] [MIC 0] [Tag 0] [CPU->MIC Data] 40064 (bytes)
[Offload] [MIC 0] [Tag 0] [MIC Time] 0.000188(seconds)
[Offload] [MIC 0] [Tag 0] [MIC->CPU Data] 0 (bytes)
start
b4 offload_transfer
matrix(1)= 99.00
matrix(2)= 99.00
matrix(10000)= 99.00
[Offload] [MIC 0] [File] example2.F90
[Offload] [MIC 0] [Line] 27
[Offload] [MIC 0] [Tag] Tag 1
[Offload] [HOST] [Tag 1] [CPU Time] 0.000265(seconds)
[Offload] [MIC 0] [Tag 1] [CPU->MIC Data] 40072 (bytes)
[Offload] [MIC 0] [Tag 1] [MIC Time] 0.000014(seconds)
[Offload] [MIC 0] [Tag 1] [MIC->CPU Data] 0 (bytes)
af offload_transfer
GW b4 call t3s2 loc(matrix)= 1CAA690
GW start t3s2
GW inside t3s2 loc(matA)= 1CAA690
[Offload] [MIC 0] [File] example2.F90
[Offload] [MIC 0] [Line] 71
[Offload] [MIC 0] [Tag] Tag 2
[Offload] [HOST] [Tag 2] [CPU Time] 0.004952(seconds)
[Offload] [MIC 0] [Tag 2] [CPU->MIC Data] 12 (bytes)
[Offload] [MIC 0] [Tag 2] [MIC Time] 0.000058(seconds)
[Offload] [MIC 0] [Tag 2] [MIC->CPU Data] 8 (bytes)
[Offload] [MIC 0] [File] example2.F90
[Offload] [MIC 0] [Line] 78
[Offload] [MIC 0] [Tag] Tag 3
[Offload] [HOST] [Tag 3] [CPU Time] 0.236283(seconds)
[Offload] [MIC 0] [Tag 3] [CPU->MIC Data] 16 (bytes)
[Offload] [MIC 0] [Tag 3] [MIC Time] 0.227425(seconds)
[Offload] [MIC 0] [Tag 3] [MIC->CPU Data] 4 (bytes)
[Offload] [MIC 0] [File] example2.F90
[Offload] [MIC 0] [Line] 41
[Offload] [MIC 0] [Tag] Tag 4
[Offload] [HOST] [Tag 4] [CPU Time] 0.028915(seconds)
[Offload] [MIC 0] [Tag 4] [CPU->MIC Data] 72 (bytes)
[Offload] [MIC 0] [Tag 4] [MIC Time] 0.000257(seconds)
[Offload] [MIC 0] [Tag 4] [MIC->CPU Data] 40000 (bytes)
Expected values are 99.0, 100.0, 98.0
matrix(1)= 99.00
matrix(2)= 100.00
matrix(10000)= 98.00[/plain]
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page