/// /// implements recycle-as-parent continuation-passing /// /// \note @c derived_task must satisfy the following concept: /// \code /// struct derived_task /// : recycle_as_parent_continuation_task /// { /// unsigned work_load(); /// void serial_execution(); /// bool may_split(); /// void generate_children(child_task_list&); /// void continuation(); /// }; /// \endcode /// /// @a work_load() returns a proxy for the work load of the current task. /// @a work_load() is called twice (which must return the same value) for /// each child before spawning, and is used to balance further splitting. /// If all children return zero, we assume their work load is perfectly /// balanced. Otherwise, a child returning zero is liable to @a /// serial_execution(). /// /// @a serial_execution() performs the current task serially. It is called /// when further recursive parallelism is deemed unnecessary. /// /// @a may_split() indicates that the algorithm should consider, but not /// necessarily use, recursive parallelism via @a generate_children() for /// the current task. It should return false only if @a serial_execution() /// is guaranteed to be preferrable over recursive parallelism: in doubt /// return true. /// /// @a generate_children(child_task_list&) tries to split the current /// task. The child tasks have to be entered into the @c child_task_list /// via @a recycle_as_parent_continuation_task::add_child(). On returning, /// the parent task must be prepared for @a continuation(), even if no /// child was generated. /// /// @a continuation() continues (and finishes) the parent task after all /// children have been finished themselves, either by recursive parallelism /// or by @a serial_execution(). template class recycle_as_parent_continuation_task : tbb::task { typedef recycle_as_parent_continuation_task base_task; /// number of tasks we need splitting into, continuation if negative. float split_count; /// shall we split this task? or use serial execution? bool must_split_this() { return ! static_cast(this)->may_split() ? false : 1 < ( is_stolen_task() ? split_count = std::max(float(tbb::task_scheduler_init::default_num_threads()),split_count) : split_count ); } /// split count suitable for root static float split_count_root() noexcept { auto n_thread = tbb::task_scheduler_init::default_num_threads(); return n_thread>1? float(16*n_thread) : 0.f; } /// is this a continuation task? /// \note we must allow for the root task, when split_count < -2 bool is_continuation() noexcept { return 0 > (split_count < -2 ? split_count = split_count_root() : split_count); } public: /// constructor: set split_count to unique non-sensical value recycle_as_parent_continuation_task() noexcept : split_count(-3.f) {} /// object for collecting sub-tasks class child_task_list : public tbb::task_list { friend class recycle_as_parent_continuation_task; unsigned count,work; child_task_list() : count(0), work(0) {} derived_task*pop_front() { return &static_cast(tbb::task_list::pop_front()); } #ifdef TBB_TASK_LIST_HAS_FIRST derived_task*first() const noexcept { return static_cast(tbb::task_list::first()); } #endif #if __cplusplus < 201103L public: #endif /// add a new child task /// \note to be used by derived_task::generate_children() /// \note the child task must be allocated with @a new(allocate_child()) void push_back(derived_task&child) { tbb::task_list::push_back(child); count++ ; work += child.work_load(); } };// struct recycle_as_parent_continuation_task::child_task_list #if __cplusplus >= 201103L /// add a new child task to list /// \note to be used by derived_task::generate_children() /// \note reference arguments must be passed explicitly as reference via /// @a std::ref(arg) template void add_child(child_task_list&list, Args&&... args) { list.push_back(*new(allocate_child()) derived_task(std::forward(args)...)); } #endif /// execute task tbb::task*execute() { derived_task*child = nullptr; // 1 continutation if(is_continuation()) static_cast(this)->continuation(); // 2 try to split recursively else if(must_split_this()) { child_task_list children; static_cast(this)->generate_children(children); // 2.1 no children: continue immediately if(children.empty()) static_cast(this)->continuation(); // 2.2 at least one child: use recursive parallelism else { #ifdef TBB_TASK_LIST_HAS_FIRST // this would be the code if tbb::task_list had minimum iteration support // 2.2.1 set children's split_count if(children.work > 0) { auto tmp = split_count / float(children.work); for(child = children.first_task(); child; child = static_cast(child.next_task())) static_cast(child)->split_count = tmp*float(child->work_load()); } else { auto tmp = split_count / float(children.count); for(child = children.first_task(); child; child = static_cast(child.next_task())) static_cast(child)->split_count = tmp; } child = children.pop_front(); // 2.2.2 morph to continuation and spawn children split_count = -1; recycle_as_continuation(); set_ref_count(int(children.count)); spawn(children); #else // NOTE we must visit each child before spawning it to set its // split_count. Since tbb::task_list does not allow // iteration over its tasks, this means we must pop all // children form their list and push them into a new list. // 2.2.1 set children's split_count tbb::task_list children_to_spawn; // this could be avoided if(children.work > 0) { auto tmp = split_count / float(children.work); child = children.pop_front(); static_cast(child)->split_count = tmp*float(child->work_load()); while(!children.empty()) { children_to_spawn.push_back(*child); child = children.pop_front(); static_cast(child)->split_count = tmp*float(child->work_load()); } } else { auto tmp = split_count / float(children.count); child = children.pop_front(); static_cast(child)->split_count = tmp; while(!children.empty()) { children_to_spawn.push_back(*child); child = children.pop_front(); static_cast(child)->split_count = tmp; } } // 2.2.2 morph to continuation and spawn children split_count = -1; recycle_as_continuation(); set_ref_count(int(children.count)); spawn(children_to_spawn); #endif } } // 3 serial execution else static_cast(this)->serial_execution(); return child; } #if __cplusplus >= 201103L /// run the root task /// \note reference arguments must be passed explicitly as reference via /// @a std::ref(arg) template static void parallel_execute(Args&&... args) { tbb::task::spawn_root_and_wait (*new(allocate_root()) derived_task(std::forward(args)...)); } #endif };// struct recycle_as_parent_continuation_task<> /// usage example class my_task : public recycle_as_parent_continuation_task { some_type my_data; unsigned my_work; static unsigned work_of(my_data const&data) { return /* a proxy for the work load associated with this data or zero if work load between siblings is balanced */; } public: my_task(some_type const&data) : my_data(data) , my_work(work_of(my_data)) {} unsigned work_load() const { return my_work; } bool may_split() const { return /* if task is non-trivial and generate_children() should be considered */; } void serial_execution() { /* perform task as if in serial code */ } void generate_children(typename recycle_as_parent_continuation_task::child_task_list &list) { for( /* loop sub-tasks */ ) { some_type sub = /* set data for sub-task */; add_child(list,sub); } } void continuation() { /* finish task after all children have finished, may do nothing */ } }; my_task::parallel_execute(some_data);