!************************************************************************************************* program test use omp_lib implicit none include 'mkl.fi' include 'mpif.h' integer, parameter :: I8B = selected_int_kind(12) ! 8-byte integer integer, parameter :: I4B = selected_int_kind(9) ! 4-byte integer integer, parameter :: sg = kind(1.0) ! single precision integer, parameter :: db = kind(1.0D0) ! double precision integer, parameter :: pr = db ! working precision integer, parameter :: stdout = 6 integer :: numGroups,numDoF,numThreads, ierr, thread, i integer :: maxThreads, numProc, numOmpProc,rank, mkl_num_threads integer(kind=OMP_integer_kind) :: numT character(len=198) :: mversion logical :: master real(pr), allocatable :: a(:,:,:), b(:,:,:), c(:,:,:) real(pr) :: tstart, tdiff1,tdiff2 real(pr), parameter :: alpha = 1_pr real(pr), parameter :: beta = 0_pr call mpi_init(ierr) call mpi_comm_rank(MPI_COMM_WORLD,rank,ierr) call mpi_comm_size(MPI_COMM_WORLD,numProc,ierr) master = rank .eq. 0 numGroups = 200 numDoF = 1000 maxThreads = OMP_get_max_threads() numOmpProc= OMP_get_num_procs() mkl_num_threads = mkl_get_max_threads() write(stdout,'(A,I2,A,I2)') 'OMP max threads = ', maxThreads, ' threads on rank ', rank write(stdout,'(A,I2,A,I2)') 'OMP num Proc = ', numOmpProc, ' threads on rank ', rank write(stdout,'(A,I2,A,I2)') 'MKL max Threads = ', mkl_num_threads, ' threads on rank ', rank numThreads = min(maxThreads,numOmpProc) numT = numThreads call OMP_set_num_threads(numT) if(0 .eq. rank) then call mkl_get_version_string(mversion) write(stdout,*) mversion write(stdout,*) 'Number of Groups: ', numGroups write(stdout,*) 'Number of DoF per matrix: ', numDoF end if !write(stdout,'(A,I2,A,I2)') 'There are ', numThreads, ' threads on rank ', rank !write(stdout,'(A,I2,A,I2)') 'MKL is using ', mkl_num_threads, ' threads on rank ', rank flush(stdout) !call mkl_set_num_threads(numT) call mpi_barrier(MPI_COMM_WORLD,ierr) allocate(a(numDoF,numDoF,numThreads)); a = 1._pr allocate(b(numDoF,numDoF,numThreads)); b = 1._pr allocate(c(numDoF,numDoF,numThreads)); c = 0._pr !------------------------------------------------------------------------------------ call mpi_barrier(MPI_COMM_WORLD,ierr) if(master) write(stdout,*) '------------------------------------------------------' flush(stdout) call mpi_barrier(MPI_COMM_WORLD,ierr) if (master) write(stdout,*) 'Starting threaded.' write(stdout,'(A,I2,A,I2)') 'There are ', numThreads, ' threads on rank ', rank write(stdout,'(A,I2,A,I2)') 'MKL is using ', mkl_num_threads, ' threads on rank ', rank flush(stdout) call mpi_barrier(MPI_COMM_WORLD,ierr) tstart = dsecnd() !$omp parallel default(shared) private(i,thread) thread = omp_get_thread_num()+1 !$omp do schedule(static) do i = 1,numGroups call dgemm('N', 'N', NumDoF,NumDoF,NumDoF, alpha, a(:,:,thread), NumDoF, b(:,:,thread), NumDoF, beta, c(:,:,thread), NumDoF) end do !$omp end do !$omp end parallel !call mpi_barrier(MPI_COMM_WORLD,ierr) tdiff1 = dsecnd() - tstart write(stdout,*) 'TIME for Threaded: ', tdiff1, ' on rank ', rank flush(stdout) !------------------------------------------------------------------------------------ ! call mpi_barrier(MPI_COMM_WORLD,ierr) ! ! call OMP_set_num_threads(numT) ! write(stdout,'(A,I2,A,I2)') 'There are ', numThreads, ' threads on rank ', rank ! write(stdout,'(A,I2,A,I2)') 'MKL is using ', mkl_num_threads, ' threads on rank ', rank ! flush(stdout) ! ! if (master) write(stdout,*) 'Starting threaded.' ! tstart = dsecnd() ! !$omp parallel default(shared) private(i,thread) ! thread = omp_get_thread_num()+1 ! !$omp do schedule(static) ! do i = 1,numGroups ! call dgemm('N', 'N', NumDoF,NumDoF,NumDoF, alpha, a(:,:,thread), NumDoF, b(:,:,thread), NumDoF, beta, c(:,:,thread), NumDoF) ! end do ! !$omp end do ! !$omp end parallel ! !call mpi_barrier(MPI_COMM_WORLD,ierr) ! tdiff1 = dsecnd() - tstart ! write(stdout,*) 'TIME for Threaded: ', tdiff1, ' on rank ', rank ! flush(stdout) ! !------------------------------------------------------------------------------------ call mpi_barrier(MPI_COMM_WORLD,ierr) if(master) write(stdout,*) '------------------------------------------------------' flush(stdout) call mpi_barrier(MPI_COMM_WORLD,ierr) if(master) write(stdout,*) 'Starting non-threaded.' write(stdout,'(A,I2,A,I2)') 'There are ', numThreads, ' threads on rank ', rank write(stdout,'(A,I2,A,I2)') 'MKL is using ', mkl_num_threads, ' threads on rank ', rank flush(stdout) tstart = dsecnd() do i = 1,numGroups call dgemm('N', 'N', NumDoF,NumDoF,NumDoF, alpha, a(:,:,1), NumDoF, b(:,:,1), NumDoF, beta, c(:,:,1), NumDoF) end do !call mpi_barrier(MPI_COMM_WORLD,ierr) tdiff2 = dsecnd() - tstart write(stdout,*) 'TIME for NON-Threaded: ', tdiff2, ' on rank ', rank flush(stdout) !------------------------------------------------------------------------------------ call mpi_barrier(MPI_COMM_WORLD,ierr) if(master) write(stdout,*) '------------------------------------------------------' flush(stdout) call mpi_barrier(MPI_COMM_WORLD,ierr) if(master) write(stdout,*) 'Starting non-threaded.' if(master) write(stdout,*) 'Calling mkl_set_num_threads' call mkl_set_num_threads(numT) write(stdout,'(A,I2,A,I2)') 'There are ', numThreads, ' threads on rank ', rank write(stdout,'(A,I2,A,I2)') 'MKL is using ', mkl_num_threads, ' threads on rank ', rank flush(stdout) tstart = dsecnd() do i = 1,numGroups call dgemm('N', 'N', NumDoF,NumDoF,NumDoF, alpha, a(:,:,1), NumDoF, b(:,:,1), NumDoF, beta, c(:,:,1), NumDoF) end do !call mpi_barrier(MPI_COMM_WORLD,ierr) tdiff2 = dsecnd() - tstart write(stdout,*) 'TIME for NON-Threaded: ', tdiff2, ' on rank ', rank !write(stdout,*) 'RATIO: ', tdiff1/tdiff2 deallocate(a,b,c) call mpi_finalize(ierr) end program test