Intel® Fortran Compiler
Build applications that can scale for the future with optimized code designed for Intel® Xeon® and compatible processors.
Announcements
This community is designed for sharing of public information. Please do not share Intel or third-party confidential information here.
27170 Discussions

## Is there any way to utilize BLAS with symmetry for high rank tensor/array New Contributor I
295 Views

Hi,

I came cross a type of tensor contraction with symmetry in individual tensor(s). I would like to know if there is a way to utilize such symmetries in BLAS.

For instance, for a tensor contraction

A[a,b] * B[b,c,d] = C[a,c,d]

where the Einstein summation rule was assumed, repeated index, b, stands for summation. i.e., \sum_b in the above equation. Tensor/array A,B,C are all real. The tensor/array b has a symmetry

B[b,c,d] = B[b,d,c]

C[a,c,d] = C[a,d,c]

In BLAS, there are some subroutines about symmetric matrix, but not higher rank tensor/array as far as I know. Is there any way to utilize the above symmetries in DGEMM or any other subroutines?

I heard to first do loops over indices c and d, then do DGEMM with array with fewer indices. If I define new arrays

B_small(:) = B(:,c,d), C_small(:) = C(:,c,d)

that would take a bit time (I think I tested the wall time at some stage). Hence, I would like to know any efficient solution. My attempt code is following

Program blas_symm_tensor

Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

Implicit None

Real( wp ), Dimension( :, :    ), Allocatable :: a
Real( wp ), Dimension( :, :, : ), Allocatable :: b
Real( wp ), Dimension( :, :, : ), Allocatable :: c1, c2, c3, c4, c5

Integer :: na, nb, nc, nd, ne
Integer :: la, lb, lc, ld
Integer( li ) :: start, finish, rate, numthreads

Write( *, * ) 'na, nb, nc, nd ?'
Read( *, * ) na, nb, nc, nd

ne = nc * nd

Allocate( a ( 1:na, 1:nb ) )
Allocate( b ( 1:nb, 1:nc, 1:nd ) )
Allocate( c1( 1:na, 1:nc, 1:nd ) )
Allocate( c2( 1:na, 1:nc, 1:nd ) )
Allocate( c3( 1:na, 1:nc, 1:nd ) )
Allocate( c4( 1:na, 1:nc, 1:nd ) )
Allocate( c5( 1:na, 1:nc, 1:nd ) )

! Set up some data
Call Random_number( a )
Call Random_number( b )

! symmetrize tensor b
do la = 1, na
do lc = 1, nc
do ld = lc+1, nd
b(la,lc,ld) = b(la,ld,lc)
enddo
enddo
enddo

Call System_clock( start, rate )
c1 = 0.0_wp

do ld = 1, nd
do lc = 1, nc
do lb = 1, nb
do la = 1, na
c1(la,lc,ld) = c1(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
enddo
enddo
enddo
enddo

Call System_clock( finish, rate )
Write( *, * ) 'Time for loop', Real( finish - start, wp ) / rate

Call System_clock( start, rate )
c2 = 0.0_wp

do ld = 1, nd
do lc = 1, ld
do lb = 1, nb
do la = 1, na
c2(la,lc,ld) = c2(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
enddo
enddo
enddo
enddo

do ld = 1, nd
do lc = ld+1, nc
do la = 1, na
c2(la,lc,ld) = c2(la,ld,lc)
end do
enddo
enddo

Call System_clock( finish, rate )
Write( *, * ) 'Time for symmetric loop', Real( finish - start, wp ) / rate

Call System_clock( start, rate )
c3 = 0.0_wp

do ld = 1, nd
do lc = 1, ld
Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
b(:,lc,ld) , Size( b , Dim = 1 ), &
0.0_wp, c3, Size( c3, Dim = 1 ) )
enddo
enddo

do ld = 1, nd
do lc = ld+1, nc
do la = 1, na
c3(la,lc,ld) = c3(la,ld,lc)
end do
enddo
enddo
Call System_clock( finish, rate )

Write( *, * ) 'Time for symmetric dgemm', Real( finish - start, wp ) / rate

! Direct
Call System_clock( start, rate )
Call dgemm( 'N', 'N', na, ne, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
b , Size( b , Dim = 1 ), &
0.0_wp, c4, Size( c4, Dim = 1 ) )
Call System_clock( finish, rate )
Write( *, * ) 'Time for straight dgemm', Real( finish - start, wp ) / rate

Call System_clock( start, rate )
c5 = 0.0_wp
do ld = 1, nd
do lc = 1, ld
Call dgemv( 'N', na,  nb, 1.0_wp,  a, na, b(1,lc,ld), 1, 0.0_wp,  c5, 1 )
enddo
enddo

do ld = 1, nd
do lc = ld+1, nc
do la = 1, na
c5(la,lc,ld) = c5(la,ld,lc)
end do
enddo
enddo
Call System_clock( finish, rate )
Write( *, * ) 'Time for symmetric dgemv', Real( finish - start, wp ) / rate

do la = 1, na
do lc = 1, nc
do ld = 1, nd

if ( dabs(c1(la,lc,ld) - c2(la,lc,ld))  > 1.e-6 ) then
write (*,*) '!!! c2', la,lc,ld, c1(la,lc,ld) - c2(la,lc,ld)
endif

if ( dabs(c1(la,lc,ld) - c3(la,lc,ld))  > 1.e-6 ) then
write (*,*) '!!! c3', la,lc,ld, c1(la,lc,ld) - c3(la,lc,ld)
endif

if ( dabs(c1(la,lc,ld) - c4(la,lc,ld))  > 1.e-6 ) then
write (*,*) '!!! c4', la,lc,ld, c1(la,lc,ld) - c4(la,lc,ld)
endif

if ( dabs(c1(la,lc,ld) - c5(la,lc,ld))  > 1.e-6 ) then
write (*,*) '!!! c5', la,lc,ld, c1(la,lc,ld) - c5(la,lc,ld)
endif

enddo
enddo
enddo

End Program blas_symm_tensor

I got different number in resulting array c3 (DGEMM) and c5 (DGEMV)

Thank you very much!

Labels (1)
• ### Fortran Language

1 Solution Black Belt Retired Employee
272 Views

This is not a Fortran compiler question - better asked in Intel® oneAPI Base Toolkit - Intel Community , which is where MKL hangs out.

2 Replies Black Belt Retired Employee
273 Views

This is not a Fortran compiler question - better asked in Intel® oneAPI Base Toolkit - Intel Community , which is where MKL hangs out. New Contributor I
265 Views

Thanks you very much. I will post there. 