Hi!!, I have to access the array p in loop of k index of below code to save any calculated value. But it takes much time to access the array. I checked access time in my machine (VS2008, release mode, i7(6 cores), windows 7):
- there is empty loop(no calculation and no array access): 17ms
- there is the calculation part of (1): 17ms
- no calculation part and there is just "p = 1000" in the loop : 21ms
- calculation part (1) + "p = 1000" : 57ms
- calculation part (1) + "p = val" : 123ms
I cannot understand why it takes so long time to access the array "p" in the for loop.
=======================================
#include "tbb/tbb.h"
#include "tbb/blocked_range.h"
#include "tbb/blocked_range2d.h"
#include "tbb/parallel_for.h"
#include "tbb/tick_count.h"
#include "tbb/scalable_allocator.h"
using namespace std;
using namespace tbb;
/////
class Average : public CPartLoop
{
public:
...
int hh, ww;
void operator() (const blocked_range2d& range) const;
void ParallelProcessing(int hh, int ww);
};
void Average::operator() (const blocked_range2d& range) const
{
register int i,j,k;
float *p = scalable_allocator().allocate(100);
for(i=range.rows().begin(); i != range.rows().end(); ++i)
{
int ii = (int)(i/50);
int jj = i-ii*50;
for(j=range.cols().begin(); j != range.cols().end(); ++j)
{
int dd = (int)(j/36);
int ss = j-dd*36;
for(k=0; k
{
//// (1) math calculation part ////
int nr = (int)kp.r;
int nc = (int)kp.c;
if( ii+nr>hh-1 || ii+nr<0 || jj+nc > ww-1 || jj+nc<0) continue;
int index1 = (ii+nr)*ww;
int tr = A[index1+(jj+nc)];
int tc = B[index1+(jj+nc)];
float te = C[index1+(jj+nc)];
float aa = dd + (kp.d*r2d);
float bb = kp.e;
float val = (tr+tc)*cos(aa*d2r)*(bb * te);
////// end of (1) ///////
p = val; // 1000 // (2) array access part
}
}
}
tbb::scalable_allocator().deallocate(p,100);
}
void Average::ParallelProcessing(int hh, int ww)
{
this->hh = hh;
this->ww = ww;
parallel_for(blocked_range2d(0,50*50,4,0,360,4), *this);
}