Solved: Re: Is there any way to utilize BLAS with symmetry for high rank tensor/array

AlphaF20 · ‎04-04-2021

Hi,

I came cross a type of tensor contraction with symmetry in individual tensor(s). I would like to know if there is a way to utilize such symmetries in BLAS.

For instance, for a tensor contraction

A[a,b] * B[b,c,d] = C[a,c,d]

where the Einstein summation rule was assumed, repeated index, b, stands for summation. i.e., \sum_b in the above equation. Tensor/array A,B,C are all real. The tensor/array b has a symmetry

B[b,c,d] = B[b,d,c]

which leads to

C[a,c,d] = C[a,d,c]

In BLAS, there are some subroutines about symmetric matrix, but not higher rank tensor/array as far as I know. Is there any way to utilize the above symmetries in DGEMM or any other subroutines?

I heard to first do loops over indices c and d, then do DGEMM with array with fewer indices. If I define new arrays

B_small(:) = B(:,c,d), C_small(:) = C(:,c,d)

that would take a bit time (I think I tested the wall time at some stage). Hence, I would like to know any efficient solution. My attempt code is following

Program blas_symm_tensor

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

  Implicit None

  Real( wp ), Dimension( :, :    ), Allocatable :: a
  Real( wp ), Dimension( :, :, : ), Allocatable :: b
  Real( wp ), Dimension( :, :, : ), Allocatable :: c1, c2, c3, c4, c5

  Integer :: na, nb, nc, nd, ne
  Integer :: la, lb, lc, ld  
  Integer( li ) :: start, finish, rate, numthreads
  
  Write( *, * ) 'na, nb, nc, nd ?'
  Read( *, * ) na, nb, nc, nd
  
  ne = nc * nd
  
  Allocate( a ( 1:na, 1:nb ) ) 
  Allocate( b ( 1:nb, 1:nc, 1:nd ) ) 
  Allocate( c1( 1:na, 1:nc, 1:nd ) ) 
  Allocate( c2( 1:na, 1:nc, 1:nd ) ) 
  Allocate( c3( 1:na, 1:nc, 1:nd ) )
  Allocate( c4( 1:na, 1:nc, 1:nd ) )
  Allocate( c5( 1:na, 1:nc, 1:nd ) )
  
  ! Set up some data
  Call Random_number( a )
  Call Random_number( b )
  
  ! symmetrize tensor b
  do la = 1, na 
    do lc = 1, nc
      do ld = lc+1, nd
         b(la,lc,ld) = b(la,ld,lc)
      enddo
    enddo
  enddo
  
  Call System_clock( start, rate )
  c1 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, nc
      do lb = 1, nb
        do la = 1, na    
          c1(la,lc,ld) = c1(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
        enddo  
      enddo
    enddo
  enddo  
 
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for loop', Real( finish - start, wp ) / rate  
 
  Call System_clock( start, rate )
  c2 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, ld
      do lb = 1, nb
        do la = 1, na    
          c2(la,lc,ld) = c2(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
        enddo  
      enddo
    enddo
  enddo  
 
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c2(la,lc,ld) = c2(la,ld,lc)
      end do  
    enddo
  enddo  

  Call System_clock( finish, rate )
  Write( *, * ) 'Time for symmetric loop', Real( finish - start, wp ) / rate  

  Call System_clock( start, rate )
  c3 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, ld  
       Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,lc,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp, c3, Size( c3, Dim = 1 ) )      
    enddo
  enddo  
  
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c3(la,lc,ld) = c3(la,ld,lc)
      end do  
    enddo
  enddo  
  Call System_clock( finish, rate )
  
  Write( *, * ) 'Time for symmetric dgemm', Real( finish - start, wp ) / rate
  
  ! Direct
  Call System_clock( start, rate )
  Call dgemm( 'N', 'N', na, ne, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b , Size( b , Dim = 1 ), &
                                            0.0_wp, c4, Size( c4, Dim = 1 ) )
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for straight dgemm', Real( finish - start, wp ) / rate

  Call System_clock( start, rate )
  c5 = 0.0_wp
  do ld = 1, nd  
    do lc = 1, ld  
       Call dgemv( 'N', na,  nb, 1.0_wp,  a, na, b(1,lc,ld), 1, 0.0_wp,  c5, 1 )      
    enddo
  enddo  
   
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c5(la,lc,ld) = c5(la,ld,lc)
      end do  
    enddo
  enddo  
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for symmetric dgemv', Real( finish - start, wp ) / rate
  
  do la = 1, na 
    do lc = 1, nc
      do ld = 1, nd
      
         if ( dabs(c1(la,lc,ld) - c2(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c2', la,lc,ld, c1(la,lc,ld) - c2(la,lc,ld)
         endif               
                  
         if ( dabs(c1(la,lc,ld) - c3(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c3', la,lc,ld, c1(la,lc,ld) - c3(la,lc,ld)
         endif         
 
         if ( dabs(c1(la,lc,ld) - c4(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c4', la,lc,ld, c1(la,lc,ld) - c4(la,lc,ld)
         endif        
 
         if ( dabs(c1(la,lc,ld) - c5(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c5', la,lc,ld, c1(la,lc,ld) - c5(la,lc,ld)
         endif   
 
      enddo
    enddo
  enddo  
  
End Program blas_symm_tensor

I got different number in resulting array c3 (DGEMM) and c5 (DGEMV)

Thank you very much!

Steve_Lionel · ‎04-05-2021

This is not a Fortran compiler question - better asked in Intel® oneAPI Base Toolkit - Intel Community , which is where MKL hangs out.

View solution in original post

Steve_Lionel · ‎04-05-2021

This is not a Fortran compiler question - better asked in Intel® oneAPI Base Toolkit - Intel Community , which is where MKL hangs out.

AlphaF20 · ‎04-05-2021

Thanks you very much. I will post there.

Is there any way to utilize BLAS with symmetry for high rank tensor/array

Fortran Language