- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Hi. I whote a function for cumulative summation, which is defined by y_i = sum_0^i x_i. I whote this function in two style.
One is ParallelScanSum.hpp.
The result is
out_array[0]:0
out_array[1]:1
out_array[2]:3
out_array[3]:6
out_array[4]:10
out_array[5]:15
out_array[6]:21
out_array[7]:28
out_array[8]:36
out_array[9]:45
out[0]:0
out[1]:0
out[2]:0
out[3]:0
out[4]:0
out[5]:0
out[6]:0
out[7]:0
out[8]:0
out[9]:0
As you see here, the conventional array version of this function (parallelScanSum.hpp) is O.K. but the tbb::concurrent_vector version (concurrent_vector_parallelScanSum.hpp) is not. Do you have any idea for this failure?
Thanks.
One is ParallelScanSum.hpp.
[cpp]// // ParallelScanSum.hpp // #includeAnother is "concurrent_vector_ParallelScanSum.hpp".#include "tbb/task_scheduler_init.h" #include "tbb/blocked_range.h" #include "tbb/parallel_scan.h" template class ParallelScanSumClass{ public: ParallelScanSumClass( T* __restrict results_, const T* __restrict inputData_ ) : sum_(0), inputData_( inputData_ ), results_( results_ ){} ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : sum_(0), inputData_( obj.inputData_ ), results_( obj.results_ ){} void reverse_join( ParallelScanSumClass& obj ) { sum_ = obj.sum_ + sum_; } void assign( ParallelScanSumClass& obj ) { sum_ = obj.sum_; } template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) { T tmp = sum_; for ( int i = range.begin(); i != range.end(); ++i ) { tmp = tmp + inputData_[ i ]; if ( Tag::is_final_scan() ) { results_[ i ] = tmp; } } sum_ = tmp; } T sum_; T* results_; const T* inputData_; }; template void ParallelScanSum( unsigned int n, const T* __restrict inp, T* __restrict out){ ParallelScanSumClass pss( out, inp ); tbb::parallel_scan( tbb::blocked_range ( 0, n ), pss, tbb::auto_partitioner() ); } [/cpp]
[cpp]// // concurrent_vector_ParallelScanSum.hpp // #includeAnd this is the main program.#include "tbb/task_scheduler_init.h" #include "tbb/blocked_range.h" #include "tbb/concurrent_vector.h" #include "tbb/parallel_scan.h" namespace concurrent_vector{ template class ParallelScanSumClass{ public: ParallelScanSumClass( tbb::concurrent_vector &results_, const tbb::concurrent_vector &inputData_ ) : sum_(0), results_( results_ ), inputData_( inputData_ ){} ParallelScanSumClass( ParallelScanSumClass& obj, tbb::split ) : inputData_( obj.inputData_ ), results_( obj.results_ ),sum_(0){} void reverse_join( ParallelScanSumClass& obj ) { sum_ = obj.sum_ + sum_; } void assign( ParallelScanSumClass& obj ) { sum_ = obj.sum_; } template< typename Tag > void operator()( const tbb::blocked_range< unsigned int >& range, Tag ) { T tmp = sum_; for ( int i = range.begin(); i != range.end(); ++i ) { tmp = tmp + inputData_[ i ]; if ( Tag::is_final_scan() ) { results_[ i ] = tmp; } } sum_ = tmp; } T sum_; tbb::concurrent_vector results_; const tbb::concurrent_vector inputData_; }; template void ParallelScanSum( unsigned int n, const tbb::concurrent_vector & inp, tbb::concurrent_vector & out){ ParallelScanSumClass pss( out, inp ); tbb::parallel_scan( tbb::blocked_range ( 0, n ), pss, tbb::auto_partitioner() ); } } [/cpp]
[cpp]// // main.cpp // #include#include "tbb/concurrent_vector.h" #include "tbb/task_scheduler_init.h" #include "parallelScanSum.hpp" #include "concurrent_vector_parallelScanSum.hpp" int main(){ tbb::concurrent_vector inp; tbb::concurrent_vector out(10); unsigned int inp_array[10]; unsigned int out_array[10]; for(unsigned int i = 0; i < 10; ++i){ inp.push_back( i ); inp_array = i; } ParallelScanSum( 10, inp_array, out_array ); for( unsigned int i = 0; i < 10; ++i ){ std::cout << "out_array[" << i << "]:" << out_array << std::endl; } concurrent_vector::ParallelScanSum( 10, inp, out ); for( unsigned int i = 0; i < 10; ++i ){ std::cout << "out[" << i << "]:" << out << std::endl; } return 0; }[/cpp]
The result is
out_array[0]:0
out_array[1]:1
out_array[2]:3
out_array[3]:6
out_array[4]:10
out_array[5]:15
out_array[6]:21
out_array[7]:28
out_array[8]:36
out_array[9]:45
out[0]:0
out[1]:0
out[2]:0
out[3]:0
out[4]:0
out[5]:0
out[6]:0
out[7]:0
out[8]:0
out[9]:0
As you see here, the conventional array version of this function (parallelScanSum.hpp) is O.K. but the tbb::concurrent_vector version (concurrent_vector_parallelScanSum.hpp) is not. Do you have any idea for this failure?
Thanks.
1 Solution
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The concurrent_vector member variables in ParallelScanSumClass should be references.
Link Copied
2 Replies
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
The concurrent_vector member variables in ParallelScanSumClass should be references.
- Mark as New
- Bookmark
- Subscribe
- Mute
- Subscribe to RSS Feed
- Permalink
- Report Inappropriate Content
Thank you for your reply.
I change the concurrent_vector_parallelScanSum.hpp code at line 35 and 36 into
tbb::concurrent_vector& results_;
consttbb::concurrent_vector& inputData_;
It works. It's easy mistakes ...
I change the concurrent_vector_parallelScanSum.hpp code at line 35 and 36 into
tbb::concurrent_vector
consttbb::concurrent_vector
It works. It's easy mistakes ...
Reply
Topic Options
- Subscribe to RSS Feed
- Mark Topic as New
- Mark Topic as Read
- Float this Topic for Current User
- Bookmark
- Subscribe
- Printer Friendly Page