Community
cancel
Showing results for 
Search instead for 
Did you mean: 
Highlighted
Beginner
17 Views

TBB scheduler is stuck and unable to run enqueued tasks

Hi,

 

We are running into TBB scheduler issue where we see tasks are getting enqueued but not executed. Can you please give me some pointers on what can go wrong.

Here are few details>

TBB version: TBB 2018 Update 5, we hit same issue with TBB 4.3 initial version as well.

One master thread instantiates Scheduler with thread count as 8. priority is same for all tasks. Using enqueue() method for enqueuing tasks.

 

Tbb::internal::market::theMarket

 

{<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, <tbb::internal::rml::tbb_client> = {<rml::client> = {<rml::versioned_object> = {_vptr.versioned_object = 0x2b035fa253d0 <vtable for tbb::internal::market+16>}, <No data fields>}, <No data fields>}, static theMarket = 0x3169500,

  static theMarketMutex = {<tbb::internal::mutex_copy_deprecated_and_disabled> = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, <No data fields>}, flag = 0 '\000', static is_rw_mutex = false, static is_recursive_mutex = false, static is_fair_mutex = false},

  my_arenas_list_mutex = {<tbb::internal::mutex_copy_deprecated_and_disabled> = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, <No data fields>}, static is_rw_mutex = true, static is_recursive_mutex = false, static is_fair_mutex = false, static WRITER = 1, static WRITER_PENDING = 2,

    static READERS = -4, static ONE_READER = 4, static BUSY = -3, state = 0}, my_server = 0x3168e80, my_num_workers_hard_limit = 256, my_num_workers_soft_limit = 8,

  my_num_workers_requested = 8,

  my_first_unused_worker_idx = {<tbb::internal::atomic_impl_with_arithmetic<unsigned int, unsigned int, char>> = {<tbb::internal::atomic_impl<unsigned int>> = {

        my_storage = {my_value = 16}}, <No data fields>}, <No data fields>}, my_total_demand = 8, my_mandatory_num_requested = 0, my_global_top_priority = 1,

  my_global_bottom_priority = 1, my_global_reload_epoch = 353591608, my_priority_levels = {{

      arenas = {<tbb::internal::intrusive_list_base<tbb::internal::intrusive_list<tbb::internal::arena>, tbb::internal::arena>> = {my_head = {my_prev_node = 0x3169548,

            my_next_node = 0x3169548}, my_size = 0}, <No data fields>}, next_arena = 0x0, workers_requested = 0, workers_available = 0}, {

      arenas = {<tbb::internal::intrusive_list_base<tbb::internal::intrusive_list<tbb::internal::arena>, tbb::internal::arena>> = {my_head = {my_prev_node = 0x3175500,

            my_next_node = 0x3175500}, my_size = 1}, <No data fields>}, next_arena = 0x3175500, workers_requested = 8, workers_available = 8}, {

      arenas = {<tbb::internal::intrusive_list_base<tbb::internal::intrusive_list<tbb::internal::arena>, tbb::internal::arena>> = {my_head = {my_prev_node = 0x3169598,

            my_next_node = 0x3169598}, my_size = 0}, <No data fields>}, next_arena = 0x0, workers_requested = 0, workers_available = 0}}, my_arenas_aba_epoch = 0,

  my_ref_count = 2, my_public_ref_count = 1, my_stack_size = 4194304, my_join_workers = false, static skip_soft_limit_warning = 4294967295,

  my_workers_soft_limit_to_report = 8, my_task_node_count = {<tbb::internal::atomic_impl_with_arithmetic<long, long, char>> = {<tbb::internal::atomic_impl<long>> = {

        my_storage = {my_value = 0}}, <No data fields>}, <No data fields>},

  my_masters = {<tbb::internal::intrusive_list_base<tbb::internal::intrusive_list<tbb::internal::generic_scheduler>, tbb::internal::generic_scheduler>> = {my_head = {

        my_prev_node = 0x317d790, my_next_node = 0x317d790}, my_size = 1}, <No data fields>}, my_workers = {0x2b0370000900}}

 

Arena:

 

{<tbb::internal::padded<tbb::internal::arena_base, 128ul>> = {<tbb::internal::padded_base<tbb::internal::arena_base, 128ul, 0ul>> = {<tbb::internal::arena_base> = {<tbb::internal::padded<tbb::internal::intrusive_list_node, 128ul>> = {<tbb::internal::padded_base<tbb::internal::intrusive_list_node, 128ul, 16ul>> = {<tbb::internal::intrusive_list_node> = {my_prev_node = 0x3169570, my_next_node = 0x3169570}, pad = '\000' <repeats 111 times>}, <No data fields>}, my_num_workers_allotted = 8,

        my_references = {<tbb::internal::atomic_impl_with_arithmetic<unsigned int, unsigned int, char>> = {<tbb::internal::atomic_impl<unsigned int>> = {my_storage = {

                my_value = 4097}}, <No data fields>}, <No data fields>}, my_top_priority = 1,

        my_limit = {<tbb::internal::atomic_impl_with_arithmetic<unsigned int, unsigned int, char>> = {<tbb::internal::atomic_impl<unsigned int>> = {my_storage = {

                my_value = 9}}, <No data fields>}, <No data fields>},

        my_task_stream = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, population = {0, 26300, 0}, lanes = {0x3169e68,

            0x3177b38, 0x317a948}, N = 16}, my_max_num_workers = 8, my_num_workers_requested = 8,

        my_pool_state = {<tbb::internal::atomic_impl_with_arithmetic<unsigned long, unsigned long, char>> = {<tbb::internal::atomic_impl<unsigned long>> = {my_storage = {

                my_value = 18446744073709551615}}, <No data fields>}, <No data fields>}, my_observers = {my_head = 0x0, my_tail = 0x0, my_mutex = {array = {{member = 0}}},

          my_arena = 0x3175500}, my_bottom_priority = 1, my_reload_epoch = 0, my_orphaned_tasks = 0x0,

        my_abandonment_epoch = {<tbb::internal::atomic_impl_with_arithmetic<unsigned long, unsigned long, char>> = {<tbb::internal::atomic_impl<unsigned long>> = {

              my_storage = {my_value = 0}}, <No data fields>}, <No data fields>},

        my_skipped_fifo_priority = {<tbb::internal::atomic_impl_with_arithmetic<long, long, char>> = {<tbb::internal::atomic_impl<long>> = {my_storage = {

                my_value = 0}}, <No data fields>}, <No data fields>}, my_market = 0x3169500, my_aba_epoch = 0, my_default_ctx = 0x317db80, my_num_slots = 9,

        my_num_reserved_slots = 1, my_concurrency_mode = tbb::internal::arena_base::cm_normal,

        my_exit_monitors = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>},

          mutex_ec = {<tbb::internal::mutex_copy_deprecated_and_disabled> = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, <No data fields>}, flag = 0 '\000', static is_rw_mutex = false, static is_recursive_mutex = false, static is_fair_mutex = false},

          waitset_ec = {<tbb::internal::no_copy> = {<tbb::internal::no_assign> = {<No data fields>}, <No data fields>}, count = 0, head = {next = 0x3175660,

              prev = 0x3175660}}, epoch = 0}, my_guard = 0}, <No data fields>}, <No data fields>}, static SNAPSHOT_EMPTY = 0, static SNAPSHOT_FULL = 18446744073709551615,

  static ref_external_bits = 12, static ref_external = 1, static ref_worker = 4096, static out_of_arena = 18446744073709551615, my_slots = {

    {<tbb::internal::padded<tbb::internal::arena_slot_line1, 128ul>> = {<tbb::internal::padded_base<tbb::internal::arena_slot_line1, 128ul, 24ul>> = {<tbb::internal::arena_slot_line1> = {my_scheduler = 0x317d780, task_pool = 0x0, head = 0},

          pad = '\000' <repeats 103 times>}, <No data fields>}, <tbb::internal::padded<tbb::internal::arena_slot_line2, 128ul>> = {<tbb::internal::padded_base<tbb::internal::arena_slot_line2, 128ul, 32ul>> = {<tbb::internal::arena_slot_line2> = {hint_for_pop = 0, tail = 0, my_task_pool_size = 0, task_pool_ptr = 0x0},

          pad = '\000' <repeats 95 times>}, <No data fields>}, <No data fields>}}}

 

Thanks in advance,

Sangarshan

0 Kudos
2 Replies
Highlighted
Beginner
17 Views

Hi , 

number of active threads in arena is 1 and which is used for i/o and expected to run forever. as per the TBB state , new work is available and my_max_workers_requested is 8  but server my slack value is set to -1, all worker threads are in commit wait state waiting for wake up signal. is this expected when work is available in arena.

Thanks in advance,

Sangarshan

 

0 Kudos
Highlighted
Beginner
17 Views

we see a loop in asleep_list in private server object.
total 256 workers created , looks like linked list is broken. we see loop as given below with worker index
241->250->245->249->246->251->244->251
worker 244 is pointing back to 251 here.

Regards,

Sangarshan

 

0 Kudos