Intel® oneAPI Math Kernel Library
Ask questions and share information with other developers who use Intel® Math Kernel Library.

Problem with robust estimation of a covariance matrix.

Anas_A_
Beginner
561 Views

Hi I have the matrix "x" and I want to compute the covariance matrix. The i column of the matrix stores the observations
of the i variable.

The matrix is    
    0.8147    0.9058
    0.1270    0.9134
    0.6324    0.0975
and the true covariance matrix is
    0.1269   -0.0450
   -0.0450    0.2198

I read the manual Summary Statistics Application Notes (page 32) that explains how to find  a Robust Estimation of a Variance- -
Covariance Matrix and I wrote the following code in C.

The problem is that the results of the estimation are wrong.
Please, I would like to ask if someone could help me to find the bug.
Thank you very much.

The  code is:

/// Recursive Covariance, Mean.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <math.h>               /* Math stuff      ISOC  */
#include <stdio.h>              /* I/O lib         ISOC  */
#include <stdlib.h>             /* Standard Lib    ISOC  */
#include <fstream>
#include <mkl.h>
#include <conio.h>
#include <iostream>
#include <string>
#include <omp.h>
#include "mkl_vsl.h"

using namespace std;

#define DIM 2 /* number of parameters */
#define N 3 /* number of observations */


int main()
{

 int i, j;

 //x[DIM*N]
 double x[6]={0.4218, 0.7922, 0.6557 ,0.9157, 0.9595, 0.0357};

 VSLSSTaskPtr task;
 
 double params[VSL_SS_TBS_PARAMS_N];
 double rcov[DIM*DIM], rmean[DIM];
 
 MKL_INT nparams, xstorage, rcovstorage;
 MKL_INT p, n;
 
 int status;
 
 double breakdown, alpha, sigma, max_iter;
 /* Parameters of the task and initialization */
 p = DIM;
 n = N;
 
 xstorage = VSL_SS_MATRIX_STORAGE_ROWS;
 rcovstorage = VSL_SS_MATRIX_STORAGE_FULL;
 nparams = VSL_SS_TBS_PARAMS_N; /* number of TBS parameters */
 
 /* Parameters of the TBS estimator */
 breakdown = 0.3;
 alpha = 0.01;
 sigma = 0.01;
 max_iter = 30;

 params[0] = breakdown;
 params[1] = alpha;
 params[2] = sigma;
 params[3] = max_iter;

 /* Create a task */
 status = vsldSSNewTask( &task, &p, &n, &xstorage, x, 0, 0 );
 
 /* Initialize the task parameters */
 status = vsldSSEditRobustCovariance( task, &rcovstorage, &nparams, params, rmean, rcov );

 /* Compute the robust variance-covariance matrix */
 status = vsldSSCompute( task, VSL_SS_ROBUST_COV, VSL_SS_METHOD_TBS );

 
 printf("-------------------------------------------------------\n"); 
 
 printf("Robust covariance estimate:\n"); 
 for ( i = 0; i < p; i++ ) 
 { 
         for( j = 0; j < p; j++ ) printf("%lf \t \t", rcov[i*N+j]); 
         printf("\n"); 
    }


 printf("-------------------------------------------------------\n");

    printf("\n"); 
    printf("Robust mean estimate:\n"); 
    for ( i = 0; i < p; i++ )   printf("%lf\n, ", rmean); 
    printf("\n\n");

 
 /* Deallocate the task resources */
 status = vslSSDeleteTask( &task );
return 0;
}

 

0 Kudos
1 Solution
Ying_H_Intel
Employee
561 Views

Hi Anas,

Just wonder you mentioned, The matrix is   
    0.8147    0.9058
    0.1270    0.9134
    0.6324    0.0975
and the true covariance matrix is
    0.1269   -0.0450
   -0.0450    0.2198

why in the code it is double x[6]={0.4218, 0.7922, 0.6557 ,0.9157, 0.9595, 0.0357};

There is a example code under MKL install directory/ vsldrobustcov.c  to show how to use the function to get robust estimation of a covariance matrix. You can run it first to see the result.

and a simple corariance matrix can be obtained, for example,

#include <stdio.h>
#include <stdlib.h>
#include <mkl.h>

#define DIM 2 /* number of parameters */
#define N 4 /* number of observations */

int main ()
{

 MKL_INT p, n;
   int status;
// initialize
double x[8]={1, 3, 4 ,5, 2, 6, 2, 2};
 p = DIM;
 n = N;
 MKL_INT xstorage = VSL_SS_MATRIX_STORAGE_ROWS;
 MKL_INT covstorage = VSL_SS_MATRIX_STORAGE_FULL;

double w[2], mean[2],cov[2*2];
w[0] = 0.0; w[1] = 0.0;
int i, j;

VSLSSTaskPtr task;
for ( i = 0; i < p; i++ ) mean = 0.0;
for ( i = 0; i < p*p; i++ ) cov = 0.0;

/* Create task */
status = vsldSSNewTask( &task, &p, &n, &xstorage, x, 0, 0 );
/* Initialize the task parameters */
status = vsldSSEditCovCor( task, mean, cov, &covstorage, 0, 0 );
/* Calculate covariance for the x1 data */
status = vsldSSCompute( task, VSL_SS_COV, VSL_SS_METHOD_FAST );

status = vslSSDeleteTask( &task );

 printf("-------------------------------------------------------\n");
 
 printf("Robust covariance estimate:\n");
 for ( i = 0; i < p; i++ )
 {
         for( j = 0; j < p; j++ ) printf("%lf \t \t", cov[i*DIM+j]);
         printf("\n");
 }

 return 0;

}

Best Regards,

Ying

View solution in original post

0 Kudos
6 Replies
Ying_H_Intel
Employee
562 Views

Hi Anas,

Just wonder you mentioned, The matrix is   
    0.8147    0.9058
    0.1270    0.9134
    0.6324    0.0975
and the true covariance matrix is
    0.1269   -0.0450
   -0.0450    0.2198

why in the code it is double x[6]={0.4218, 0.7922, 0.6557 ,0.9157, 0.9595, 0.0357};

There is a example code under MKL install directory/ vsldrobustcov.c  to show how to use the function to get robust estimation of a covariance matrix. You can run it first to see the result.

and a simple corariance matrix can be obtained, for example,

#include <stdio.h>
#include <stdlib.h>
#include <mkl.h>

#define DIM 2 /* number of parameters */
#define N 4 /* number of observations */

int main ()
{

 MKL_INT p, n;
   int status;
// initialize
double x[8]={1, 3, 4 ,5, 2, 6, 2, 2};
 p = DIM;
 n = N;
 MKL_INT xstorage = VSL_SS_MATRIX_STORAGE_ROWS;
 MKL_INT covstorage = VSL_SS_MATRIX_STORAGE_FULL;

double w[2], mean[2],cov[2*2];
w[0] = 0.0; w[1] = 0.0;
int i, j;

VSLSSTaskPtr task;
for ( i = 0; i < p; i++ ) mean = 0.0;
for ( i = 0; i < p*p; i++ ) cov = 0.0;

/* Create task */
status = vsldSSNewTask( &task, &p, &n, &xstorage, x, 0, 0 );
/* Initialize the task parameters */
status = vsldSSEditCovCor( task, mean, cov, &covstorage, 0, 0 );
/* Calculate covariance for the x1 data */
status = vsldSSCompute( task, VSL_SS_COV, VSL_SS_METHOD_FAST );

status = vslSSDeleteTask( &task );

 printf("-------------------------------------------------------\n");
 
 printf("Robust covariance estimate:\n");
 for ( i = 0; i < p; i++ )
 {
         for( j = 0; j < p; j++ ) printf("%lf \t \t", cov[i*DIM+j]);
         printf("\n");
 }

 return 0;

}

Best Regards,

Ying

0 Kudos
Anas_A_
Beginner
561 Views

Hi Ying!!!

Just wonder you mentioned, The matrix is   
    0.8147    0.9058
    0.1270    0.9134
    0.6324    0.0975
and the true covariance matrix is
    0.1269   -0.0450
   -0.0450    0.2198

why in the code it is double x[6]={0.4218, 0.7922, 0.6557 ,0.9157, 0.9595, 0.0357};

 

Well, you are right.... if you haven't mentioned probably I wouldn't have noticed... How kind you are to help me! Thanks a lot for your help, again!

0 Kudos
Andrey_N_Intel
Employee
561 Views

Hi Anas,

Intel(R) MKL version of robust covariance algorithm requires the number of observations n to be greater than doubled dimension 2p: n > 2p. The library returns VSL_SS_ERROR_BAD_OBSERV_N error code, if this requirement is not met.

Andrey

0 Kudos
Ying_H_Intel
Employee
561 Views

Hi Andrey,

Thanks much for the notes.  Right, if chang the parameter ( n>2p) like

#define DIM 2 /* number of parameters */

#define N 5 /* number of observations */

 

int main()

{

int i, j;

 

 double x[10]={0.8147,0.1270,0.6324,0.9058,0.9134, 0.0975,0.4218, 0.7922, 0.6557 ,0.9157};

...

/* Initialize the task parameters */
 status = vsldSSEditRobustCovariance( task, &rcovstorage, &nparams, params, rmean, rcov );
 printf ( "status %d\n", status);
 /* Compute the robust variance-covariance matrix */
 status = vsldSSCompute( task, VSL_SS_ROBUST_COV, VSL_SS_METHOD_TBS );
  printf ( "status %d\n", status);

the whole code will work.

Thanks
Ying

0 Kudos
Anas_A_
Beginner
561 Views

Thank you very much for your help

Well, I have this problem now. I have a 10-dimensional vector

theta = {   -0.4541
                -0.2327
               -0.1474
               -0.4424
               -0.2955
               -0.4420
               0.9612
               2.3070
              -0.7524
              -0.4429 }

where each row is an observation of the i variable, i=1,2,...,10.

and  a matrix trace of size DIM*NSAMPLES, where DIM=10. I want to store the vector theta to each column of the matrix trace. The problem is this:

I have noticed that if NSAMPLES<7 then all works. If NSAMPLES>=7, then the covariance matrix has some strange numbers like  11945302443632259000000000000000000000000000000000000000000.000000.

but the estimated mean is correct .

Please, I don't understand what I have done wrong.

 

The code is:

#include "stdafx.h"
#include <math.h>               /* Math stuff      ISOC  */
#include <stdio.h>              /* I/O lib         ISOC  */
#include <stdlib.h>             /* Standard Lib    ISOC  */
#include <fstream>
#include <mkl.h>
#include <conio.h>
#include <iostream>
#include <string>
#include <omp.h>
#include <time.h>
#include "mkl_vml.h"

using namespace std;
void showMtrx(double *mtrx, int nrow, int ncol);
void fReadMtrx(string f, double *mtrx, int nrow, int ncol);

#define T 1001
#define SEED time(NULL)

#define NPARS 10 /*numbers of parameters*/
#define MAXPQR 3

#define NSAMPLES 7 /*numbers of observations*/


int main()
{    
    double trace[NPARS][NSAMPLES];
    int i, j, t,status;
    double theta[NPARS];
    double meanold[NPARS], covold[NPARS*NPARS];
    MKL_INT p, n;

    fReadMtrx("theta.txt", theta, NPARS, 1);

    VSLSSTaskPtr task;        
    p= NPARS;
    n = NSAMPLES;
    MKL_INT xstorage = VSL_SS_MATRIX_STORAGE_ROWS;
    MKL_INT covstorage = VSL_SS_MATRIX_STORAGE_FULL;


    /* MAIN ITERATIVE LOOP. */
    for(t=1; t<= NSAMPLES; t++){    
        printf("%d \n", t);
        for( j = 0; j < NPARS; j++ ){
            trace[t-1] = theta;
        }
    }// End of "t" loop.

 

    /* Create task */
    status = vsldSSNewTask( &task, &p, &n, &xstorage, &trace[0][0], 0, 0 );
    /* Initialize the task parameters */
    status = vsldSSEditCovCor( task, meanold, covold, &covstorage, 0, 0 );
    /* Calculate covariance for the x1 data */
    status = vsldSSCompute( task, VSL_SS_COV, VSL_SS_METHOD_FAST );

    printf("Robust covariance estimate:\n");
    for ( i = 0; i < p; i++ ){
        for( j = 0; j < p; j++ ){
            printf("%lf \n",j, covold[i*NPARS+j]);
        }
        printf("\n");
    }

    printf("Robust mean estimate:\n");
    for ( i = 0; i < p; i++ ){ printf("%0.4f \t \t", meanold);
    printf("\n");
    }
        
status = vslSSDeleteTask( &task );
return 0;
}

/*  */

void fReadMtrx(string f, double *mtrx, int nrow, int ncol)
{
  ifstream data(f.c_str(), ios::in);
  if (!data) {
    cerr << "File " << f << " could not be opened." << endl;
    exit(1);
  }
  for(int i = 0; i < nrow; i++)
    for(int j = 0; j < ncol; j++)
      data >> mtrx[i*ncol+j];
} // end of function fReadMtrx
void showMtrx(double *mtrx, int nrow, int ncol)
{
  for (int i = 0; i < nrow; i++) {
    for (int j = 0; j < ncol; j++) {
      cout <<  mtrx[i*ncol+j] << "\t";
    }
    cout << endl;
  }
} //end of function showMtrx

 

0 Kudos
Anas_A_
Beginner
561 Views

I found it! I was too tired yesterday to understand it. Ying and Andrey thank you very much for your help!

0 Kudos
Reply