Intel® oneAPI Threading Building Blocks
Ask questions and share information about adding parallelism to your applications when using this threading library.
Announcements
This community is designed for sharing of public information. Please do not share Intel or third-party confidential information here.
2421 Discussions

tbb::concurrent_vector does not work with tbb::parallel_scan

jtayama
Beginner
130 Views
Hi. I whote a function for cumulative summation, which is defined by y_i = sum_0^i x_i. I whote this function in two style.
One is ParallelScanSum.hpp.
[cpp]//
// ParallelScanSum.hpp
//
#include 
#include "tbb/task_scheduler_init.h"
#include "tbb/blocked_range.h"
#include "tbb/parallel_scan.h"

template 
class ParallelScanSumClass{
public:
  ParallelScanSumClass( T* __restrict results_, const T* __restrict inputData_ ) : sum_(0), inputData_( inputData_ ), results_( results_ ){}
  ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : sum_(0), inputData_( obj.inputData_ ), results_( obj.results_ ){}
  void reverse_join( ParallelScanSumClass& obj ) {
    sum_ = obj.sum_ + sum_;
  }
  void assign( ParallelScanSumClass& obj ) {
    sum_ = obj.sum_;
  }
  template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) {
    T tmp = sum_;
    for ( int i = range.begin(); i != range.end(); ++i ) {
      tmp = tmp + inputData_[ i ];
      if ( Tag::is_final_scan() ) {
        results_[ i ] = tmp;
      }
    }
    sum_ = tmp;
  }

  T sum_;
  T* results_;
  const T* inputData_;
};
template void ParallelScanSum( unsigned int n, const T* __restrict inp, T* __restrict out){
  ParallelScanSumClass pss( out, inp );
  tbb::parallel_scan( tbb::blocked_range( 0, n ), pss, tbb::auto_partitioner() );
}
[/cpp]
Another is "concurrent_vector_ParallelScanSum.hpp".
[cpp]//
// concurrent_vector_ParallelScanSum.hpp
//
#include 
#include "tbb/task_scheduler_init.h"
#include "tbb/blocked_range.h"
#include "tbb/concurrent_vector.h"
#include "tbb/parallel_scan.h"

namespace concurrent_vector{
  template 
  class ParallelScanSumClass{
  public:

    ParallelScanSumClass( tbb::concurrent_vector &results_,
                                      const tbb::concurrent_vector &inputData_ ) : sum_(0), results_( results_ ), inputData_( inputData_ ){}
    ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : inputData_( obj.inputData_ ), results_( obj.results_ ),sum_(0){}
    void reverse_join( ParallelScanSumClass& obj ) {
      sum_ = obj.sum_ + sum_;
    }
    void assign( ParallelScanSumClass& obj ) {
      sum_ = obj.sum_;
    }
    template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) {
      T tmp = sum_;
      for ( int i = range.begin(); i != range.end(); ++i ) {
        tmp = tmp + inputData_[ i ];
        if ( Tag::is_final_scan() ) {
          results_[ i ] = tmp;
        }
      }
      sum_ = tmp;
    }
    T sum_;
    tbb::concurrent_vector results_;
    const tbb::concurrent_vector inputData_;
  };
  template void ParallelScanSum( unsigned int n, const tbb::concurrent_vector& inp, tbb::concurrent_vector& out){
    ParallelScanSumClass pss( out, inp );
    tbb::parallel_scan( tbb::blocked_range( 0, n ), pss, tbb::auto_partitioner() );
  }
}
[/cpp]
And this is the main program.
[cpp]//
// main.cpp
//
#include  
#include "tbb/concurrent_vector.h"
#include "tbb/task_scheduler_init.h"
#include "parallelScanSum.hpp"
#include "concurrent_vector_parallelScanSum.hpp"


int main(){

  tbb::concurrent_vector inp;
  tbb::concurrent_vector out(10);

  unsigned int inp_array[10];
  unsigned int out_array[10];

  for(unsigned int i = 0; i < 10; ++i){
    inp.push_back( i );
    inp_array = i;
  }
  ParallelScanSum( 10, inp_array, out_array );
  for( unsigned int i = 0; i < 10; ++i ){
    std::cout << "out_array[" << i << "]:" << out_array << std::endl;
  }

  concurrent_vector::ParallelScanSum( 10, inp, out );
  for( unsigned int i = 0; i < 10; ++i ){
    std::cout << "out[" << i << "]:" << out << std::endl;
  }
  return 0;
}[/cpp]


The result is
out_array[0]:0
out_array[1]:1
out_array[2]:3
out_array[3]:6
out_array[4]:10
out_array[5]:15
out_array[6]:21
out_array[7]:28
out_array[8]:36
out_array[9]:45
out[0]:0
out[1]:0
out[2]:0
out[3]:0
out[4]:0
out[5]:0
out[6]:0
out[7]:0
out[8]:0
out[9]:0

As you see here, the conventional array version of this function (parallelScanSum.hpp) is O.K. but the tbb::concurrent_vector version (concurrent_vector_parallelScanSum.hpp) is not. Do you have any idea for this failure?

Thanks.
0 Kudos
1 Solution
RafSchietekat
Black Belt
130 Views
The concurrent_vector member variables in ParallelScanSumClass should be references.

View solution in original post

2 Replies
RafSchietekat
Black Belt
131 Views
The concurrent_vector member variables in ParallelScanSumClass should be references.
jtayama
Beginner
130 Views
Thank you for your reply.

I change the concurrent_vector_parallelScanSum.hpp code at line 35 and 36 into

tbb::concurrent_vector& results_;
consttbb::concurrent_vector& inputData_;

It works. It's easy mistakes ...
Reply