Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.
6975 Discussions

Is there any way to utilize BLAS with symmetry for high rank tensor/array in Fortran MKL?

AlphaF20
New Contributor I
1,815 Views

Hi,

I came cross a type of tensor contraction with symmetry in individual tensor(s) in Fortran.  I heard it is related to MKL. Hence, I posted this question here.

The question is, is there a way to utilize such symmetries in BLAS?

For instance, for a tensor contraction

 

A[a,b] * B[b,c,d] = C[a,c,d]

 

 

where the Einstein summation rule was assumed, repeated index, b, stands for summation. i.e., \sum_b in the above equation. Tensor/array A,B,C are all real. The tensor/array b has a symmetry

 

 

B[b,c,d] = B[b,d,c]

 

 

which leads to

 

C[a,c,d] = C[a,d,c]

 

 

 

In BLAS, there are some subroutines about symmetric matrix, but not higher rank tensor/array as far as I know. Is there any way to utilize the above symmetries in DGEMM or any other subroutines?

I heard to first do loops over indices c and d, then do DGEMM with array with fewer indices. If I define new arrays

 

B_small(:) = B(:,c,d), C_small(:) = C(:,c,d)

 

that would take a bit time (I think I tested the wall time at some stage). Hence, I would like to know any efficient solution. My attempt code is following

 

 

Program blas_symm_tensor

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

  Implicit None

  Real( wp ), Dimension( :, :    ), Allocatable :: a
  Real( wp ), Dimension( :, :, : ), Allocatable :: b
  Real( wp ), Dimension( :, :, : ), Allocatable :: c1, c2, c3, c4, c5

  Integer :: na, nb, nc, nd, ne
  Integer :: la, lb, lc, ld  
  Integer( li ) :: start, finish, rate, numthreads
  
  Write( *, * ) 'na, nb, nc, nd ?'
  Read( *, * ) na, nb, nc, nd
  
  ne = nc * nd
  
  Allocate( a ( 1:na, 1:nb ) ) 
  Allocate( b ( 1:nb, 1:nc, 1:nd ) ) 
  Allocate( c1( 1:na, 1:nc, 1:nd ) ) 
  Allocate( c2( 1:na, 1:nc, 1:nd ) ) 
  Allocate( c3( 1:na, 1:nc, 1:nd ) )
  Allocate( c4( 1:na, 1:nc, 1:nd ) )
  Allocate( c5( 1:na, 1:nc, 1:nd ) )
  
  ! Set up some data
  Call Random_number( a )
  Call Random_number( b )
  
  ! symmetrize tensor b
  do la = 1, na 
    do lc = 1, nc
      do ld = lc+1, nd
         b(la,lc,ld) = b(la,ld,lc)
      enddo
    enddo
  enddo
  
  Call System_clock( start, rate )
  c1 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, nc
      do lb = 1, nb
        do la = 1, na    
          c1(la,lc,ld) = c1(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
        enddo  
      enddo
    enddo
  enddo  
 
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for loop', Real( finish - start, wp ) / rate  
 
  Call System_clock( start, rate )
  c2 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, ld
      do lb = 1, nb
        do la = 1, na    
          c2(la,lc,ld) = c2(la,lc,ld)  + a(la,lb) * b(lb, lc, ld)
        enddo  
      enddo
    enddo
  enddo  
 
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c2(la,lc,ld) = c2(la,ld,lc)
      end do  
    enddo
  enddo  

  Call System_clock( finish, rate )
  Write( *, * ) 'Time for symmetric loop', Real( finish - start, wp ) / rate  

  Call System_clock( start, rate )
  c3 = 0.0_wp
  
  do ld = 1, nd  
    do lc = 1, ld  
       Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,lc,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp, c3, Size( c3, Dim = 1 ) )      
    enddo
  enddo  
  
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c3(la,lc,ld) = c3(la,ld,lc)
      end do  
    enddo
  enddo  
  Call System_clock( finish, rate )
  
  Write( *, * ) 'Time for symmetric dgemm', Real( finish - start, wp ) / rate
  
  ! Direct
  Call System_clock( start, rate )
  Call dgemm( 'N', 'N', na, ne, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b , Size( b , Dim = 1 ), &
                                            0.0_wp, c4, Size( c4, Dim = 1 ) )
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for straight dgemm', Real( finish - start, wp ) / rate

  Call System_clock( start, rate )
  c5 = 0.0_wp
  do ld = 1, nd  
    do lc = 1, ld  
       Call dgemv( 'N', na,  nb, 1.0_wp,  a, na, b(1,lc,ld), 1, 0.0_wp,  c5, 1 )      
    enddo
  enddo  
   
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c5(la,lc,ld) = c5(la,ld,lc)
      end do  
    enddo
  enddo  
  Call System_clock( finish, rate )
  Write( *, * ) 'Time for symmetric dgemv', Real( finish - start, wp ) / rate
  
  do la = 1, na 
    do lc = 1, nc
      do ld = 1, nd
      
         if ( dabs(c1(la,lc,ld) - c2(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c2', la,lc,ld, c1(la,lc,ld) - c2(la,lc,ld)
         endif               
                  
         if ( dabs(c1(la,lc,ld) - c3(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c3', la,lc,ld, c1(la,lc,ld) - c3(la,lc,ld)
         endif         
 
         if ( dabs(c1(la,lc,ld) - c4(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c4', la,lc,ld, c1(la,lc,ld) - c4(la,lc,ld)
         endif        
 
         if ( dabs(c1(la,lc,ld) - c5(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c5', la,lc,ld, c1(la,lc,ld) - c5(la,lc,ld)
         endif   
 
      enddo
    enddo
  enddo  
  
End Program blas_symm_tensor

 

 

 

I got different number in resulting array c3 (DGEMM) and c5 (DGEMV)

Thank you very much!

0 Kudos
1 Solution
Peter_C_Intel
Employee
1,750 Views

Another option, if you only need e.g. the "upper triangle" of C to be stored, is to do a loop in the d dimension around DGEMM:

for d = 1...ld
       C(:, 1:d, d) <- A * B(:, 1:d, d)

The inner matrix multiplications can be performed with DGEMM.

View solution in original post

0 Kudos
14 Replies
RahulV_intel
Moderator
1,791 Views

Hi,


We are working on your query internally and will get back to you.


Thanks,

Rahul


0 Kudos
Peter_C_Intel
Employee
1,772 Views

Hi @AlphaF20 , thanks for your question!

As you mentioned, you can use DGEMM to perform this contraction, and in fact you can do it with a single DGEMM call if you'd like. Going back to your original operation:

C[a,c,d] <- A[a,b] * B[b,c,d]

notice you can flatten the last two indices (c,d) into a single linear index f without rearranging the data:

C'[a,f] <- A[a,b] * B'[b,f]

Here I've reinterpreted C, B as matrices C', B' of size la x (lc*ld) and lb x (lc*ld) respectively. The single index f replaces the two indices c, d (f = c + d *lc), and ranges from 1...lc*ld. This is a plain matrix multiply, so you can call MKL DGEMM.

So far we haven't taken any advantage of the symmetry of B. If it works for the rest of the application, you could only store the unique elements of B' by keeping only the unique columns, say where c <= d. In this way, f becomes a linear index into the "upper triangles" of B, ranging from 1...(lc*(lc + 1)/2).

0 Kudos
AlphaF20
New Contributor I
1,766 Views

Hi @Peter_C_Intel ,

 

  Thank you so much for your answer!

 

  About >you could only store the unique elements of B' by keeping only the unique columns, say where c <= d. 

  May I know how to realize it?

  I can define new arrays with smaller dimensions, e.g., B''(:,:), and using the second dimension to store the upper triangle indices. My experience from other cases is that, introducing new arrays take some time. It may compensate for the advantage of utilizing the symmetry. Hence, I would like to know if there is any method can do DGEMM without using any additional arrays.

 Maybe I should try to test more cases with new arrays such as B''(:,:) and comparing the timing.

 

0 Kudos
Peter_C_Intel
Employee
1,763 Views

Hi @AlphaF20 -- At the moment I can't think of a clever way to avoid copying data if you need the full 3D tensor in other parts of your code. How large are la/lb/lc/ld typically for your use case?

0 Kudos
AlphaF20
New Contributor I
1,755 Views

Hi @Peter_C_Intel ,

 

  Thank you so much for your prompt reply.  My la is about 20~30, lb~ld about 30~60. In some cases, I need to deal with higher dimensional arrays and the calculation needs to be done multiple times. The accumulate wall time can be high.

0 Kudos
Peter_C_Intel
Employee
1,751 Views

Another option, if you only need e.g. the "upper triangle" of C to be stored, is to do a loop in the d dimension around DGEMM:

for d = 1...ld
       C(:, 1:d, d) <- A * B(:, 1:d, d)

The inner matrix multiplications can be performed with DGEMM.

0 Kudos
AlphaF20
New Contributor I
1,743 Views

Hi @Peter_C_Intel ,

 

  Thank you so much for your wonderful suggestion! I appreciate it!

  I tried the following code to implement your idea

  Real( wp ), Dimension( :, :, : ), Allocatable :: c6

  Allocate( c6( 1:na, 1:nc, 1:nd ) )


  c6 = 0.0_wp
  do ld = 1, nd  
     Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,1:ld,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp, c6, Size( c6, Dim = 1 ) )      
  enddo  
     
  do ld = 1, nd  
    do lc = ld+1, nc 
      do la = 1, na     
        c6(la,lc,ld) = c6(la,ld,lc)
      end do  
    enddo
  enddo    
  

  
    do la = 1, na 
      do lc = 1, nc
        do ld = 1, nd
      
         if ( dabs(c1(la,lc,ld) - c6(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c6 symmetric dgemm-2', la,lc,ld, c1(la,lc,ld) - c5(la,lc,ld)
         endif            
 
      enddo
    enddo
  enddo  

 

I got different numbers than the nested loop value c1 (in the code of the first post). Perhaps I made some mistake in using DGEMM  

  How to do DGEMM correctly here? I am sorry for bothering you so many times.

0 Kudos
Peter_C_Intel
Employee
1,740 Views

Hi @AlphaF20 -- your code looks mostly good; remember to also index into C:

dgemm(..., c6(:,1:ld,ld), ...)

0 Kudos
AlphaF20
New Contributor I
1,739 Views

Hi @Peter_C_Intel ,

Thank you so much!

I revised to

 

  do ld = 1, nd  
     Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,1:ld,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp,  c6(:,1:ld,ld), Size( c6, Dim = 1 ) )      
  enddo  

 

 

still got different value than c1 from nested loops.   I used ifort (IFORT) 19.0.3.199 20190206 and compile the f90 file using ifort -mkl.

0 Kudos
Peter_C_Intel
Employee
1,730 Views

@AlphaF20 Which entries in c6 are incorrect? (all of them?)

0 Kudos
AlphaF20
New Contributor I
1,724 Views

Hi @Peter_C_Intel,

 Thank you once again for your reply. I appreciate your help!

  Many entries, not exactly upper triangle. By the printing code

  do la = 1, na 
    do lc = 1, nc
      do ld = 1, nd
      
         if ( dabs(c1(la,lc,ld) - c6(la,lc,ld))  > 1.e-6 ) then 
           write (*,*) '!!! c6 symmetric dgemm-2', la,lc,ld, c1(la,lc,ld), c6(la,lc,ld)
         endif            
 
      enddo
    enddo
  enddo  

 

led to (na=nb=nc=nd=5)

!!! c6 symmetric dgemm-2           1           2           2
   1.31348887015712       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           3
  0.837588080628633       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           4
  0.753232695495532       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           5
  0.478117026507440       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           2
  0.837588080628633       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           3
   1.60546094997582       0.000000000000000E+000

I don't know where I made the mistake with DGEMM

If you need any additional information, please let me know. I try to be as clear as I can.

 

 

0 Kudos
Peter_C_Intel
Employee
1,719 Views

That's strange that e.g. the (1,2,4) entry is incorrect (zero), while the (1,4,2) entry is correct, since the (1,4,2) entry was copied from the (1,2,4) entry.

0 Kudos
AlphaF20
New Contributor I
1,702 Views

Oops, I did not paste all inconsistent entries

 

 

 !!! c6 symmetric dgemm-2           1           2           2
   1.31348887015712       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           3
  0.837588080628633       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           4
  0.753232695495532       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           2           5
  0.478117026507440       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           2
  0.837588080628633       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           3
   1.60546094997582       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           4
  0.635405154591089       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           3           5
  0.936981465066013       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           4           2
  0.753232695495532       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           4           3
  0.635405154591089       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           4           4
  0.951465241952073       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           4           5
   1.35589302240768       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           5           2
  0.478117026507440       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           5           3
  0.936981465066013       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           5           4
   1.35589302240768       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           1           5           5
  0.947407510599024       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           2           2
  0.982740142784391       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           2           3
  0.454261565081234       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           2           4
  0.379019697626723       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           2           5
  0.463878131907890       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           3           2
  0.454261565081234       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           3           3
   1.38584009273324       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           3           4
  0.772631816537492       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           3           5
  0.838583550164614       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           4           2
  0.379019697626723       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           4           3
  0.772631816537492       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           4           4
  0.801559198544687       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           4           5
  0.945483527728317       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           5           2
  0.463878131907890       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           5           3
  0.838583550164614       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           5           4
  0.945483527728317       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           2           5           5
   1.01922577710048       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           2           2
   1.21501967341257       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           2           3
  0.751368628288491       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           2           4
   1.20695111834953       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           2           5
  0.768640451902166       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           3           2
  0.751368628288491       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           3           3
   1.24850494613211       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           3           4
   1.02216928236875       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           3           5
   1.02297953612829       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           4           2
   1.20695111834953       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           4           3
   1.02216928236875       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           4           4
  0.854921457179884       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           4           5
   1.51419329132511       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           5           2
  0.768640451902166       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           5           3
   1.02297953612829       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           5           4
   1.51419329132511       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           3           5           5
   1.05841684139229       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           2           2
   2.42535630024303       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           2           3
   1.47908052253849       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           2           4
   1.92820282226805       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           2           5
   1.89486126875148       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           3           2
   1.47908052253849       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           3           3
   3.14610468834190       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           3           4
   2.08679999203636       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           3           5
   2.07491602246583       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           4           2
   1.92820282226805       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           4           3
   2.08679999203636       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           4           4
   1.56443083824290       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           4           5
   2.79345022032791       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           5           2
   1.89486126875148       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           5           3
   2.07491602246583       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           5           4
   2.79345022032791       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           4           5           5
   2.03919814673137       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           2           2
   1.37501064481411       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           2           3
   1.02125298029307       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           2           4
   1.50769866614544       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           2           5
  0.959930544467842       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           3           2
   1.02125298029307       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           3           3
   1.98036308866278       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           3           4
   1.18770561844815       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           3           5
   1.43258214875474       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           4           2
   1.50769866614544       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           4           3
   1.18770561844815       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           4           4
   1.19745384959945       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           4           5
   1.96112789765020       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           5           2
  0.959930544467842       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           5           3
   1.43258214875474       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           5           4
   1.96112789765020       0.000000000000000E+000
 !!! c6 symmetric dgemm-2           5           5           5
   1.32984150498105       0.000000000000000E+000

 

 

both 124 and 142 are there. I am very sorry for this issue.

0 Kudos
AlphaF20
New Contributor I
1,712 Views

Hi @Peter_C_Intel ,

 

 

  Seems I got a fix!

 

  do ld = 1, nd  
     Call dgemm( 'N', 'N', na, 1, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,1:ld,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp,  c6(:,1:ld,ld), Size( c6, Dim = 1 ) )      
  enddo  
     

the index between na and nb should *not* be 1. Replace it as ld seems to work

  do ld = 1, nd  
     Call dgemm( 'N', 'N', na, ld, nb, 1.0_wp, a , Size( a , Dim = 1 ), &
                                            b(:,1:ld,ld) , Size( b , Dim = 1 ), &
                                            0.0_wp,  c6(:,1:ld,ld), Size( c6, Dim = 1 ) )      
  enddo  
     

 

and got faster

 na, nb, nc, nd ?
100 100 100 100
 Time for straight dgemm  2.426400000000000E-002
 Time for symmetric dgemm-2  6.451000000000000E-003

Thank you so much! You made my day!

0 Kudos
Reply