Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
arena.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef _TBB_arena_H
18 #define _TBB_arena_H
19 
20 #include "tbb/tbb_stddef.h"
21 #include "tbb/atomic.h"
22 
23 #include "tbb/tbb_machine.h"
24 
25 #include "scheduler_common.h"
26 #include "intrusive_list.h"
27 #if __TBB_PREVIEW_CRITICAL_TASKS && __TBB_CPF_BUILD
28 #include "task_stream_extended.h"
29 #else
30 #include "task_stream.h"
31 #endif
32 #include "../rml/include/rml_tbb.h"
33 #include "mailbox.h"
34 #include "observer_proxy.h"
35 #include "market.h"
36 #include "governor.h"
37 #include "concurrent_monitor.h"
38 
39 namespace tbb {
40 
41 class task_group_context;
42 class allocate_root_with_context_proxy;
43 
44 namespace internal {
45 
47 
49 struct arena_base : padded<intrusive_list_node> {
51  unsigned my_num_workers_allotted; // heavy use in stealing loop
52 
54 
57  atomic<unsigned> my_references; // heavy use in stealing loop
58 
59 #if __TBB_TASK_PRIORITY
60  volatile intptr_t my_top_priority; // heavy use in stealing loop
62 #endif /* !__TBB_TASK_PRIORITY */
63 
65  atomic<unsigned> my_limit; // heavy use in stealing loop
66 
68 
73 #if __TBB_PREVIEW_CRITICAL_TASKS && __TBB_CPF_BUILD
75 #else
76  task_stream<num_priority_levels> my_task_stream; // heavy use in stealing loop
77 #endif
78 
79 #if __TBB_PREVIEW_CRITICAL_TASKS
80 
84  // used on the hot path of the task dispatch loop
85  task_stream<1, back_nonnull_accessor> my_critical_task_stream;
86 #endif
87 
90 
93 
95 
100 
101 #if __TBB_ARENA_OBSERVER
102  observer_list my_observers;
104 #endif
105 
106 #if __TBB_TASK_PRIORITY
107  intptr_t my_bottom_priority;
109 
111 
113  uintptr_t my_reload_epoch;
114 
116  task* my_orphaned_tasks;
117 
119  tbb::atomic<uintptr_t> my_abandonment_epoch;
120 
122 
125  tbb::atomic<intptr_t> my_skipped_fifo_priority;
126 #endif /* !__TBB_TASK_PRIORITY */
127 
128  // Below are rarely modified members
129 
132 
134  uintptr_t my_aba_epoch;
135 
136 #if !__TBB_FP_CONTEXT
139 #endif
140 
141 #if __TBB_TASK_GROUP_CONTEXT
142 
145  task_group_context* my_default_ctx;
146 #endif /* __TBB_TASK_GROUP_CONTEXT */
147 
149  unsigned my_num_slots;
150 
153 
154 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
155  enum concurrency_mode {
157  cm_normal = 0, // arena is served by workers as usual
158  cm_enforced_local, // arena needs an extra worker despite the arena limit
159  cm_enforced_global // arena needs an extra worker despite a global limit
160  };
161 
163  concurrency_mode my_concurrency_mode;
164 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
165 
168 
169 #if TBB_USE_ASSERT
170  uintptr_t my_guard;
172 #endif /* TBB_USE_ASSERT */
173 }; // struct arena_base
174 
175 class arena: public padded<arena_base>
176 {
179 public:
181 
187  };
188 
190  arena ( market&, unsigned max_num_workers, unsigned num_reserved_slots );
191 
193  static arena& allocate_arena( market&, unsigned num_slots, unsigned num_reserved_slots );
194 
195  static int unsigned num_arena_slots ( unsigned num_slots ) {
196  return max(2u, num_slots);
197  }
198 
199  static int allocation_size ( unsigned num_slots ) {
200  return sizeof(base_type) + num_slots * (sizeof(mail_outbox) + sizeof(arena_slot));
201  }
202 
205  __TBB_ASSERT( 0<id, "affinity id must be positive integer" );
206  __TBB_ASSERT( id <= my_num_slots, "affinity id out of bounds" );
207 
208  return ((mail_outbox*)this)[-(int)id];
209  }
210 
212  void free_arena ();
213 
214  typedef uintptr_t pool_state_t;
215 
217  static const pool_state_t SNAPSHOT_EMPTY = 0;
218 
221 
223  static const unsigned ref_external_bits = 12; // up to 4095 external and 1M workers
224 
226  static const unsigned ref_external = 1;
227  static const unsigned ref_worker = 1<<ref_external_bits;
228 
230  static bool is_busy_or_empty( pool_state_t s ) { return s < SNAPSHOT_FULL; }
231 
233  unsigned num_workers_active( ) {
235  }
236 
238  template<arena::new_work_type work_type> void advertise_new_work();
239 
241 
242  bool is_out_of_work();
243 
245  void enqueue_task( task&, intptr_t, FastRandom & );
246 
248  void process( generic_scheduler& );
249 
251  template<unsigned ref_param>
252  inline void on_thread_leaving ( );
253 
254 #if __TBB_STATISTICS
255  void dump_arena_statistics ();
257 #endif /* __TBB_STATISTICS */
258 
259 #if __TBB_TASK_PRIORITY
260 
262  inline bool may_have_tasks ( generic_scheduler*, bool& tasks_present, bool& dequeuing_possible );
263 
265  void orphan_offloaded_tasks ( generic_scheduler& s );
266 #endif /* __TBB_TASK_PRIORITY */
267 
268 #if __TBB_COUNT_TASK_NODES
269  intptr_t workers_task_node_count();
271 #endif
272 
274  bool has_enqueued_tasks();
275 
276 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
277  bool recall_by_mandatory_request() const {
279  return my_market->my_mandatory_num_requested && my_concurrency_mode==cm_normal;
280  }
281 
283  bool must_have_concurrency() const {
284  return my_num_workers_requested &&
285  ( my_concurrency_mode==cm_enforced_local || my_concurrency_mode==cm_enforced_global );
286  }
287 #endif
288  static const size_t out_of_arena = ~size_t(0);
290  template <bool as_worker>
293  size_t occupy_free_slot_in_range( generic_scheduler& s, size_t lower, size_t upper );
294 
297 }; // class arena
298 
299 template<unsigned ref_param>
300 inline void arena::on_thread_leaving ( ) {
301  //
302  // Implementation of arena destruction synchronization logic contained various
303  // bugs/flaws at the different stages of its evolution, so below is a detailed
304  // description of the issues taken into consideration in the framework of the
305  // current design.
306  //
307  // In case of using fire-and-forget tasks (scheduled via task::enqueue())
308  // master thread is allowed to leave its arena before all its work is executed,
309  // and market may temporarily revoke all workers from this arena. Since revoked
310  // workers never attempt to reset arena state to EMPTY and cancel its request
311  // to RML for threads, the arena object is destroyed only when both the last
312  // thread is leaving it and arena's state is EMPTY (that is its master thread
313  // left and it does not contain any work).
314  // Thus resetting arena to EMPTY state (as earlier TBB versions did) should not
315  // be done here (or anywhere else in the master thread to that matter); doing so
316  // can result either in arena's premature destruction (at least without
317  // additional costly checks in workers) or in unnecessary arena state changes
318  // (and ensuing workers migration).
319  //
320  // A worker that checks for work presence and transitions arena to the EMPTY
321  // state (in snapshot taking procedure arena::is_out_of_work()) updates
322  // arena::my_pool_state first and only then arena::my_num_workers_requested.
323  // So the check for work absence must be done against the latter field.
324  //
325  // In a time window between decrementing the active threads count and checking
326  // if there is an outstanding request for workers. New worker thread may arrive,
327  // finish remaining work, set arena state to empty, and leave decrementing its
328  // refcount and destroying. Then the current thread will destroy the arena
329  // the second time. To preclude it a local copy of the outstanding request
330  // value can be stored before decrementing active threads count.
331  //
332  // But this technique may cause two other problem. When the stored request is
333  // zero, it is possible that arena still has threads and they can generate new
334  // tasks and thus re-establish non-zero requests. Then all the threads can be
335  // revoked (as described above) leaving this thread the last one, and causing
336  // it to destroy non-empty arena.
337  //
338  // The other problem takes place when the stored request is non-zero. Another
339  // thread may complete the work, set arena state to empty, and leave without
340  // arena destruction before this thread decrements the refcount. This thread
341  // cannot destroy the arena either. Thus the arena may be "orphaned".
342  //
343  // In both cases we cannot dereference arena pointer after the refcount is
344  // decremented, as our arena may already be destroyed.
345  //
346  // If this is the master thread, the market is protected by refcount to it.
347  // In case of workers market's liveness is ensured by the RML connection
348  // rundown protocol, according to which the client (i.e. the market) lives
349  // until RML server notifies it about connection termination, and this
350  // notification is fired only after all workers return into RML.
351  //
352  // Thus if we decremented refcount to zero we ask the market to check arena
353  // state (including the fact if it is alive) under the lock.
354  //
355  uintptr_t aba_epoch = my_aba_epoch;
356  market* m = my_market;
357  __TBB_ASSERT(my_references >= ref_param, "broken arena reference counter");
358 #if __TBB_STATISTICS_EARLY_DUMP
359  // While still holding a reference to the arena, compute how many external references are left.
360  // If just one, dump statistics.
361  if ( modulo_power_of_two(my_references,ref_worker)==ref_param ) // may only be true with ref_external
362  GATHER_STATISTIC( dump_arena_statistics() );
363 #endif
364 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
365  // When there is no workers someone must free arena, as
366  // without workers, no one calls is_out_of_work().
367  // Skip workerless arenas because they have no demand for workers.
368  // TODO: consider more strict conditions for the cleanup,
369  // because it can create the demand of workers,
370  // but the arena can be already empty (and so ready for destroying)
371  if( ref_param==ref_external && my_num_slots != my_num_reserved_slots
372  && 0 == m->my_num_workers_soft_limit && my_concurrency_mode==cm_normal ) {
373  bool is_out = false;
374  for (int i=0; i<num_priority_levels; i++) {
375  is_out = is_out_of_work();
376  if (is_out)
377  break;
378  }
379  // We expect, that in worst case it's enough to have num_priority_levels-1
380  // calls to restore priorities and and yet another is_out_of_work() to conform
381  // that no work was found. But as market::set_active_num_workers() can be called
382  // concurrently, can't guarantee last is_out_of_work() return true.
383  }
384 #endif
385  if ( (my_references -= ref_param ) == 0 )
386  m->try_destroy_arena( this, aba_epoch );
387 }
388 
389 template<arena::new_work_type work_type> void arena::advertise_new_work() {
390  if( work_type == work_enqueued ) {
391 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
393  if( my_concurrency_mode!=cm_enforced_global ) {
394  if( my_market->mandatory_concurrency_enable( this ) ) {
396  return;
397  }
398  }
399  } else if( my_max_num_workers==0 && my_num_reserved_slots==1 ) {
400  my_max_num_workers = 1;
401  __TBB_ASSERT(my_concurrency_mode==cm_normal, NULL);
402  my_concurrency_mode = cm_enforced_local;
404  my_market->adjust_demand( *this, 1 );
405  return;
406  }
407 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
408  // Local memory fence here and below is required to avoid missed wakeups; see the comment below.
409  // Starvation resistant tasks require concurrency, so missed wakeups are unacceptable.
410  atomic_fence();
411  }
412  else if( work_type == wakeup ) {
413  __TBB_ASSERT(my_max_num_workers!=0, "Unexpected worker wakeup request");
414  atomic_fence();
415  }
416  // Double-check idiom that, in case of spawning, is deliberately sloppy about memory fences.
417  // Technically, to avoid missed wakeups, there should be a full memory fence between the point we
418  // released the task pool (i.e. spawned task) and read the arena's state. However, adding such a
419  // fence might hurt overall performance more than it helps, because the fence would be executed
420  // on every task pool release, even when stealing does not occur. Since TBB allows parallelism,
421  // but never promises parallelism, the missed wakeup is not a correctness problem.
422  pool_state_t snapshot = my_pool_state;
423  if( is_busy_or_empty(snapshot) ) {
424  // Attempt to mark as full. The compare_and_swap below is a little unusual because the
425  // result is compared to a value that can be different than the comparand argument.
427  if( snapshot!=SNAPSHOT_EMPTY ) {
428  // This thread read "busy" into snapshot, and then another thread transitioned
429  // my_pool_state to "empty" in the meantime, which caused the compare_and_swap above
430  // to fail. Attempt to transition my_pool_state from "empty" to "full".
432  // Some other thread transitioned my_pool_state from "empty", and hence became
433  // responsible for waking up workers.
434  return;
435  }
436  }
437  // This thread transitioned pool from empty to full state, and thus is responsible for
438  // telling the market that there is work to do.
439 #if __TBB_ENQUEUE_ENFORCED_CONCURRENCY
440  if( work_type == work_spawned ) {
441  if( my_concurrency_mode!=cm_normal ) {
442  switch( my_concurrency_mode ) {
443  case cm_enforced_local:
445  __TBB_ASSERT(!governor::local_scheduler()->is_worker(), "");
446  // There was deliberate oversubscription on 1 core for sake of starvation-resistant tasks.
447  // Now a single active thread (must be the master) supposedly starts a new parallel region
448  // with relaxed sequential semantics, and oversubscription should be avoided.
449  // Demand for workers has been decreased to 0 during SNAPSHOT_EMPTY, so just keep it.
450  my_max_num_workers = 0;
451  my_concurrency_mode = cm_normal;
452  break;
453  case cm_enforced_global:
454  my_market->mandatory_concurrency_disable( this );
456  break;
457  default:
458  break;
459  }
460  return;
461  }
462  }
463 #endif /* __TBB_ENQUEUE_ENFORCED_CONCURRENCY */
464  // TODO: investigate adjusting of arena's demand by a single worker.
466  }
467  }
468 }
469 
470 } // namespace internal
471 } // namespace tbb
472 
473 #endif /* _TBB_arena_H */
argument_integer_type modulo_power_of_two(argument_integer_type arg, divisor_integer_type divisor)
A function to compute arg modulo divisor where divisor is a power of 2.
Definition: tbb_stddef.h:361
void try_destroy_arena(arena *, uintptr_t aba_epoch)
Removes the arena from the market's list.
Definition: market.cpp:318
bool has_enqueued_tasks()
Check for the presence of enqueued tasks at all priority levels.
Definition: arena.cpp:375
static const intptr_t num_priority_levels
mail_outbox & mailbox(affinity_id id)
Get reference to mailbox corresponding to given affinity_id.
Definition: arena.h:204
static bool is_busy_or_empty(pool_state_t s)
No tasks to steal or snapshot is being taken.
Definition: arena.h:230
void const char const char int ITT_FORMAT __itt_group_sync s
concurrent_monitor my_exit_monitors
Waiting object for master threads that cannot join the arena.
Definition: arena.h:167
new_work_type
Types of work advertised by advertise_new_work()
Definition: arena.h:183
Used to form groups of tasks.
Definition: task.h:332
A fast random number generator.
Definition: tbb_misc.h:128
static arena & allocate_arena(market &, unsigned num_slots, unsigned num_reserved_slots)
Allocate an instance of arena.
Definition: arena.cpp:238
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
The structure of an arena, except the array of slots.
Definition: arena.h:49
unsigned my_num_reserved_slots
The number of reserved slots (can be occupied only by masters).
Definition: arena.h:152
unsigned my_max_num_workers
The number of workers requested by the master thread owning the arena.
Definition: arena.h:89
Base class for user-defined tasks.
Definition: task.h:589
Work stealing task scheduler.
Definition: scheduler.h:120
static generic_scheduler * local_scheduler()
Obtain the thread-local instance of the TBB scheduler.
Definition: governor.h:122
The graph class.
unsigned my_num_slots
The number of slots in the arena.
Definition: arena.h:149
cpu_ctl_env my_cpu_ctl_env
FPU control settings of arena's master thread captured at the moment of arena instantiation.
Definition: arena.h:138
uintptr_t pool_state_t
Definition: arena.h:214
Class representing where mail is put.
Definition: mailbox.h:96
static int unsigned num_arena_slots(unsigned num_slots)
Definition: arena.h:195
tbb::atomic< uintptr_t > my_pool_state
Current task pool state and estimate of available tasks amount.
Definition: arena.h:99
static const pool_state_t SNAPSHOT_FULL
At least one task has been offered for stealing since the last snapshot started.
Definition: arena.h:220
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain __itt_id ITT_FORMAT p const __itt_domain __itt_id __itt_timestamp __itt_timestamp ITT_FORMAT lu const __itt_domain __itt_id __itt_id __itt_string_handle ITT_FORMAT p const __itt_domain ITT_FORMAT p const __itt_domain __itt_string_handle unsigned long long ITT_FORMAT lu const __itt_domain __itt_id __itt_string_handle __itt_metadata_type size_t void ITT_FORMAT p const __itt_domain __itt_id __itt_string_handle const wchar_t size_t ITT_FORMAT lu const __itt_domain __itt_id __itt_relation __itt_id ITT_FORMAT p const wchar_t int ITT_FORMAT __itt_group_mark d int
void on_thread_leaving()
Notification that worker or master leaves its arena.
Definition: arena.h:300
int my_num_workers_requested
The number of workers that are currently requested from the resource manager.
Definition: arena.h:92
unsigned num_workers_active()
The number of workers active in the arena.
Definition: arena.h:233
#define GATHER_STATISTIC(x)
void restore_priority_if_need()
If enqueued tasks found, restore arena priority and task presence status.
Definition: arena.cpp:383
atomic< unsigned > my_references
Reference counter for the arena.
Definition: arena.h:57
static const unsigned ref_worker
Definition: arena.h:227
static const unsigned ref_external_bits
The number of least significant bits for external references.
Definition: arena.h:223
uintptr_t my_aba_epoch
ABA prevention marker.
Definition: arena.h:134
The container for "fairness-oriented" aka "enqueued" tasks.
Definition: task_stream.h:69
void process(generic_scheduler &)
Registers the worker with the arena and enters TBB scheduler dispatch loop.
Definition: arena.cpp:102
void adjust_demand(arena &, int delta)
Request that arena's need in workers should be adjusted.
Definition: market.cpp:586
unsigned short affinity_id
An id as used for specifying affinity.
Definition: task.h:120
static const size_t out_of_arena
Definition: arena.h:288
T max(const T &val1, const T &val2)
Utility template function returning greater of the two values.
Definition: tbb_misc.h:112
size_t occupy_free_slot(generic_scheduler &s)
Tries to occupy a slot in the arena. On success, returns the slot index; if no slot is available,...
Definition: arena.cpp:86
static int allocation_size(unsigned num_slots)
Definition: arena.h:199
arena(market &, unsigned max_num_workers, unsigned num_reserved_slots)
Constructor.
Definition: arena.cpp:186
void advertise_new_work()
If necessary, raise a flag that there is new job in arena.
Definition: arena.h:389
static const unsigned ref_external
Reference increment values for externals and workers.
Definition: arena.h:226
static const pool_state_t SNAPSHOT_EMPTY
No tasks to steal since last snapshot was taken.
Definition: arena.h:217
void atomic_fence()
Sequentially consistent full memory fence.
Definition: tbb_machine.h:339
unsigned my_num_workers_allotted
The number of workers that have been marked out by the resource manager to service the arena.
Definition: arena.h:51
void free_arena()
Completes arena shutdown, destructs and deallocates it.
Definition: arena.cpp:249
void enqueue_task(task &, intptr_t, FastRandom &)
enqueue a task into starvation-resistance queue
Definition: arena.cpp:554
unsigned my_num_workers_soft_limit
Current application-imposed limit on the number of workers (see set_active_num_workers())
Definition: market.h:78
size_t occupy_free_slot_in_range(generic_scheduler &s, size_t lower, size_t upper)
Tries to occupy a slot in the specified range.
Definition: arena.cpp:71
arena_slot my_slots[1]
Definition: arena.h:296
market * my_market
The market that owns this arena.
Definition: arena.h:131
bool is_out_of_work()
Check if there is job anywhere in arena.
Definition: arena.cpp:403
padded< arena_base > base_type
Definition: arena.h:180
task_stream< num_priority_levels > my_task_stream
Task pool for the tasks scheduled via task::enqueue() method.
Definition: arena.h:76
value_type compare_and_swap(value_type value, value_type comparand)
Definition: atomic.h:285
atomic< unsigned > my_limit
The maximal number of currently busy slots.
Definition: arena.h:65
Pads type T to fill out to a multiple of cache line size.
Definition: tbb_stddef.h:261

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.