[cpp]// // ParallelScanSum.hpp // #includeAnother is "concurrent_vector_ParallelScanSum.hpp".#include "tbb/task_scheduler_init.h" #include "tbb/blocked_range.h" #include "tbb/parallel_scan.h" template class ParallelScanSumClass{ public: ParallelScanSumClass( T* __restrict results_, const T* __restrict inputData_ ) : sum_(0), inputData_( inputData_ ), results_( results_ ){} ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : sum_(0), inputData_( obj.inputData_ ), results_( obj.results_ ){} void reverse_join( ParallelScanSumClass& obj ) { sum_ = obj.sum_ + sum_; } void assign( ParallelScanSumClass& obj ) { sum_ = obj.sum_; } template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) { T tmp = sum_; for ( int i = range.begin(); i != range.end(); ++i ) { tmp = tmp + inputData_[ i ]; if ( Tag::is_final_scan() ) { results_[ i ] = tmp; } } sum_ = tmp; } T sum_; T* results_; const T* inputData_; }; template void ParallelScanSum( unsigned int n, const T* __restrict inp, T* __restrict out){ ParallelScanSumClass pss( out, inp ); tbb::parallel_scan( tbb::blocked_range ( 0, n ), pss, tbb::auto_partitioner() ); } [/cpp]
[cpp]// // concurrent_vector_ParallelScanSum.hpp // #includeAnd this is the main program.#include "tbb/task_scheduler_init.h" #include "tbb/blocked_range.h" #include "tbb/concurrent_vector.h" #include "tbb/parallel_scan.h" namespace concurrent_vector{ template class ParallelScanSumClass{ public: ParallelScanSumClass( tbb::concurrent_vector &results_, const tbb::concurrent_vector &inputData_ ) : sum_(0), results_( results_ ), inputData_( inputData_ ){} ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : inputData_( obj.inputData_ ), results_( obj.results_ ),sum_(0){} void reverse_join( ParallelScanSumClass& obj ) { sum_ = obj.sum_ + sum_; } void assign( ParallelScanSumClass& obj ) { sum_ = obj.sum_; } template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) { T tmp = sum_; for ( int i = range.begin(); i != range.end(); ++i ) { tmp = tmp + inputData_[ i ]; if ( Tag::is_final_scan() ) { results_[ i ] = tmp; } } sum_ = tmp; } T sum_; tbb::concurrent_vector results_; const tbb::concurrent_vector inputData_; }; template void ParallelScanSum( unsigned int n, const tbb::concurrent_vector & inp, tbb::concurrent_vector & out){ ParallelScanSumClass pss( out, inp ); tbb::parallel_scan( tbb::blocked_range ( 0, n ), pss, tbb::auto_partitioner() ); } } [/cpp]
[cpp]// // main.cpp // #include#include "tbb/concurrent_vector.h" #include "tbb/task_scheduler_init.h" #include "parallelScanSum.hpp" #include "concurrent_vector_parallelScanSum.hpp" int main(){ tbb::concurrent_vector inp; tbb::concurrent_vector out(10); unsigned int inp_array[10]; unsigned int out_array[10]; for(unsigned int i = 0; i < 10; ++i){ inp.push_back( i ); inp_array = i; } ParallelScanSum( 10, inp_array, out_array ); for( unsigned int i = 0; i < 10; ++i ){ std::cout << "out_array[" << i << "]:" << out_array << std::endl; } concurrent_vector::ParallelScanSum( 10, inp, out ); for( unsigned int i = 0; i < 10; ++i ){ std::cout << "out[" << i << "]:" << out << std::endl; } return 0; }[/cpp]
Link Copied
For more complete information about compiler optimizations, see our Optimization Notice.