Intel® oneAPI Threading Building Blocks
Ask questions and share information about adding parallelism to your applications when using this threading library.
2477 Discussions

tbb::concurrent_vector does not work with tbb::parallel_scan

jtayama
Beginner
744 Views
Hi. I whote a function for cumulative summation, which is defined by y_i = sum_0^i x_i. I whote this function in two style.
One is ParallelScanSum.hpp.
[cpp]//
// ParallelScanSum.hpp
//
#include 
#include "tbb/task_scheduler_init.h"
#include "tbb/blocked_range.h"
#include "tbb/parallel_scan.h"

template 
class ParallelScanSumClass{
public:
  ParallelScanSumClass( T* __restrict results_, const T* __restrict inputData_ ) : sum_(0), inputData_( inputData_ ), results_( results_ ){}
  ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : sum_(0), inputData_( obj.inputData_ ), results_( obj.results_ ){}
  void reverse_join( ParallelScanSumClass& obj ) {
    sum_ = obj.sum_ + sum_;
  }
  void assign( ParallelScanSumClass& obj ) {
    sum_ = obj.sum_;
  }
  template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) {
    T tmp = sum_;
    for ( int i = range.begin(); i != range.end(); ++i ) {
      tmp = tmp + inputData_[ i ];
      if ( Tag::is_final_scan() ) {
        results_[ i ] = tmp;
      }
    }
    sum_ = tmp;
  }

  T sum_;
  T* results_;
  const T* inputData_;
};
template void ParallelScanSum( unsigned int n, const T* __restrict inp, T* __restrict out){
  ParallelScanSumClass pss( out, inp );
  tbb::parallel_scan( tbb::blocked_range( 0, n ), pss, tbb::auto_partitioner() );
}
[/cpp]
Another is "concurrent_vector_ParallelScanSum.hpp".
[cpp]//
// concurrent_vector_ParallelScanSum.hpp
//
#include 
#include "tbb/task_scheduler_init.h"
#include "tbb/blocked_range.h"
#include "tbb/concurrent_vector.h"
#include "tbb/parallel_scan.h"

namespace concurrent_vector{
  template 
  class ParallelScanSumClass{
  public:

    ParallelScanSumClass( tbb::concurrent_vector &results_,
                                      const tbb::concurrent_vector &inputData_ ) : sum_(0), results_( results_ ), inputData_( inputData_ ){}
    ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : inputData_( obj.inputData_ ), results_( obj.results_ ),sum_(0){}
    void reverse_join( ParallelScanSumClass& obj ) {
      sum_ = obj.sum_ + sum_;
    }
    void assign( ParallelScanSumClass& obj ) {
      sum_ = obj.sum_;
    }
    template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) {
      T tmp = sum_;
      for ( int i = range.begin(); i != range.end(); ++i ) {
        tmp = tmp + inputData_[ i ];
        if ( Tag::is_final_scan() ) {
          results_[ i ] = tmp;
        }
      }
      sum_ = tmp;
    }
    T sum_;
    tbb::concurrent_vector results_;
    const tbb::concurrent_vector inputData_;
  };
  template void ParallelScanSum( unsigned int n, const tbb::concurrent_vector& inp, tbb::concurrent_vector& out){
    ParallelScanSumClass pss( out, inp );
    tbb::parallel_scan( tbb::blocked_range( 0, n ), pss, tbb::auto_partitioner() );
  }
}
[/cpp]
And this is the main program.
[cpp]//
// main.cpp
//
#include  
#include "tbb/concurrent_vector.h"
#include "tbb/task_scheduler_init.h"
#include "parallelScanSum.hpp"
#include "concurrent_vector_parallelScanSum.hpp"


int main(){

  tbb::concurrent_vector inp;
  tbb::concurrent_vector out(10);

  unsigned int inp_array[10];
  unsigned int out_array[10];

  for(unsigned int i = 0; i < 10; ++i){
    inp.push_back( i );
    inp_array = i;
  }
  ParallelScanSum( 10, inp_array, out_array );
  for( unsigned int i = 0; i < 10; ++i ){
    std::cout << "out_array[" << i << "]:" << out_array << std::endl;
  }

  concurrent_vector::ParallelScanSum( 10, inp, out );
  for( unsigned int i = 0; i < 10; ++i ){
    std::cout << "out[" << i << "]:" << out << std::endl;
  }
  return 0;
}[/cpp]


The result is
out_array[0]:0
out_array[1]:1
out_array[2]:3
out_array[3]:6
out_array[4]:10
out_array[5]:15
out_array[6]:21
out_array[7]:28
out_array[8]:36
out_array[9]:45
out[0]:0
out[1]:0
out[2]:0
out[3]:0
out[4]:0
out[5]:0
out[6]:0
out[7]:0
out[8]:0
out[9]:0

As you see here, the conventional array version of this function (parallelScanSum.hpp) is O.K. but the tbb::concurrent_vector version (concurrent_vector_parallelScanSum.hpp) is not. Do you have any idea for this failure?

Thanks.
0 Kudos
1 Solution
RafSchietekat
Valued Contributor III
744 Views
The concurrent_vector member variables in ParallelScanSumClass should be references.

View solution in original post

0 Kudos
2 Replies
RafSchietekat
Valued Contributor III
745 Views
The concurrent_vector member variables in ParallelScanSumClass should be references.
0 Kudos
jtayama
Beginner
744 Views
Thank you for your reply.

I change the concurrent_vector_parallelScanSum.hpp code at line 35 and 36 into

tbb::concurrent_vector& results_;
consttbb::concurrent_vector& inputData_;

It works. It's easy mistakes ...
0 Kudos
Reply