Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Hamidreza_K_
Beginner
65 Views

Problem with free

Hi all,

I wrote following application and ran on Xeon Phi. As you can see, the function including offloads is invoked 5 times. At first time, memory is allocated to arrays A, B, and C and the memory reused when the function is reinvoked.

Please let me know why memory is not freed by offload_transfer.

//////////////////////////////////////////////

#include <stdio.h>
#include <string.h>

#include "offload.h"

#include <getopt.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
#include <mkl.h>
#include <miclib.h>

#define    ALLOC        alloc_if(1)
#define FREE        free_if(1)
#define RETAIN    free_if(0)
#define REUSE   alloc_if(0)

void gemm_kernel_mic_single_fit(
          const int M, const int N, const int K,
          const double alpha,
          const double *A, const int lda,
          const double *B, const int ldb,
          const double beta,
          double *C, const int ldc)
{      
  // transfer matrices A, B and C to device memory
    #pragma offload_transfer target (mic) in(alpha, beta,    M, N, K : ALLOC RETAIN)      
  #pragma offload_transfer target (mic) in(B:length(K * N) ALLOC RETAIN)
  #pragma offload_transfer target (mic) in(A:length(M * K) ALLOC RETAIN)                                    
  #pragma offload_transfer target (mic) in(C:length(M * N) ALLOC RETAIN)

  #pragma offload target (mic)     in(M, N, K, alpha, beta : REUSE FREE)\
                                                                nocopy(A : length(M * K) REUSE FREE) \
                                                                nocopy(B : length(K * N) REUSE FREE) \
                                                                nocopy(C : length(M * N) REUSE RETAIN)
    {                                               
        cblas_dgemm(CblasRowMajor, CblasNoTrans,CblasNoTrans,
          M, N, K, alpha, A, K,
          B, N, beta, C, N);
    }
  
  #pragma offload_wait target (mic) stream(0)
   
  // recv results
  #pragma offload_transfer target (mic) out(C : length(M * N) REUSE FREE)
  #pragma offload_wait target (mic) stream(0)
}

main()
{
    int i,j,m,n,k;
    double *a,*b,*c;    
    
    m=n=k= 11000;
    
    a = (double *)malloc(m*k*sizeof(double));
    b = (double *)malloc(n*k*sizeof(double));
    c = (double *)malloc(m*n*sizeof(double));
    
    for(i = 0; i<m*n; i++)
    {
        a = rand() % (m*k);
        b = rand() % (m*k);        
        c = 0;
    }

    for(i=0;i<5;i++)
        gemm_kernel_mic_single_fit(m, n, k, 1, a, k, b, n, 1, c, n);
}

////////////////////////////////////////////

0 Kudos
6 Replies
Ravi_N_Intel
Employee
65 Views

How did you draw the conclusion that the buffer is not destroyed?

Hamidreza_K_
Beginner
65 Views

You can monitor MIC using "micsmc" or call following function at the beginning of each iteration to read available memory. Buffers are destroyed, but the memory is not released.

//////////////////////////////////////////////

uint32_t read_mic_mem_size()
{
    struct mic_devices_list *mdl;

  int ret = mic_get_devices(&mdl);

  if (ret == E_MIC_DRIVER_NOT_LOADED)
  {
     printf("Error: The driver is not loaded!");
  }
  else if (ret == E_MIC_ACCESS)
  {
     printf("Error: Access is denied to the driver!");
  }
  else if (ret != E_MIC_SUCCESS)
  {
     printf("Failed to get cards list.");
  }

  int card;
  if (mic_get_device_at_index(mdl, 0, &card) != E_MIC_SUCCESS)
  {
     mic_free_devices(mdl);
     printf("Error: Failed to get card!");
  }

  (void)mic_free_devices(mdl);

  struct mic_device *mMdh;
  if (mic_open_device(&mMdh, card) != E_MIC_SUCCESS)
  {
     printf("Error: Failed to open card.");
  }

  uint32_t device_type;
  if (mic_get_device_type(mMdh, &device_type) != E_MIC_SUCCESS)
  {
     (void)mic_close_device(mMdh);
     printf("Error: Failed to get device type.");
  }

  if (device_type != KNC_ID)
  {
     (void)mic_close_device(mMdh);
     printf("Error: Unknown device type.");
  }

  struct mic_memory_util_info *minfo;
  uint32_t msize;

  if (mic_get_memory_utilization_info(mMdh, &minfo) != E_MIC_SUCCESS)
  {
     printf("Failed to get memory utilization info.\n");
  }

  if (mic_get_available_memory_size(minfo, &msize) != E_MIC_SUCCESS)
  {
     printf("Failed to get thermal information.\n");
  }

  printf("The memory size is %lu.\n", msize);

  (void)mic_close_device(mMdh);
  
  return msize;

}

//////////////////////////////////////////////////////////////

Ravi_N_Intel
Employee
65 Views

The runtime library COI which handles all the buffers/memory keeps a pool of memory to use for buffer allocation. This memory is not released but re-used for performance reasons

Hamidreza_K_
Beginner
65 Views

Hi,

Could you tell me how the memory can be forced to release space instead of keeping it

jimdempseyatthecove
Black Belt
65 Views

Let me preface this by stating I have not compiled and tested your code. Ergo, my comments are unfounded (merely observations)

Your offload_transfer has: in(alpha, beta,    M, N, K : ALLOC RETAIN)

Whereas,

your offload has: in(M, N, K, alpha, beta : REUSE FREE)

IOW, it does not have nocopy, as do the A, B and C arguments.

Also note that the argument list of the offload is not in the same order as in the offload_transfer. as to if this makes a difference for your memory "leak" (or unintended misuse) I cannot say.

Please test by making argument list in same order and by use of nocopy.

Second observation,

Your offload is neither asynchronous (no use of signal), nor using stream (see this). Yet your code (following the first offload) appears to be using stream (as well as being no-asynchronous).

Jim Dempsey

 

Ravi_N_Intel
Employee
65 Views

Currently we don't have a mechanism to free memory until the end of the program as we don't know when the last offload occurs.

Reply