Intel(R) Threading Building Blocks Doxygen Documentation  version 4.2.3
parallel_scan.h
Go to the documentation of this file.
1 /*
2  Copyright (c) 2005-2019 Intel Corporation
3 
4  Licensed under the Apache License, Version 2.0 (the "License");
5  you may not use this file except in compliance with the License.
6  You may obtain a copy of the License at
7 
8  http://www.apache.org/licenses/LICENSE-2.0
9 
10  Unless required by applicable law or agreed to in writing, software
11  distributed under the License is distributed on an "AS IS" BASIS,
12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  See the License for the specific language governing permissions and
14  limitations under the License.
15 */
16 
17 #ifndef __TBB_parallel_scan_H
18 #define __TBB_parallel_scan_H
19 
20 #include "task.h"
21 #include "aligned_space.h"
22 #include <new>
23 #include "partitioner.h"
24 
25 namespace tbb {
26 
28 
29 struct pre_scan_tag {
30  static bool is_final_scan() {return false;}
31  operator bool() {return is_final_scan();}
32 };
33 
35 
37  static bool is_final_scan() {return true;}
38  operator bool() {return is_final_scan();}
39 };
40 
42 namespace internal {
43 
45 
46  template<typename Range, typename Body>
47  class final_sum: public task {
48  public:
49  Body my_body;
50  private:
54  public:
55  final_sum( Body& body_ ) :
56  my_body(body_,split())
57  {
59  }
61  my_range.begin()->~Range();
62  }
63  void finish_construction( const Range& range_, Body* stuff_last_ ) {
64  new( my_range.begin() ) Range(range_);
65  my_stuff_last = stuff_last_;
66  }
67  private:
70  if( my_stuff_last )
71  my_stuff_last->assign(my_body);
72  return NULL;
73  }
74  };
75 
77 
78  template<typename Range, typename Body>
79  class sum_node: public task {
81  public:
85  private:
90  Range my_range;
91  sum_node( const Range range_, bool left_is_final_ ) :
92  my_stuff_last(NULL),
93  my_left_sum(NULL),
94  my_left(NULL),
95  my_right(NULL),
96  my_left_is_final(left_is_final_),
97  my_range(range_)
98  {
99  // Poison fields that will be set by second pass.
102  }
103  task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
104  if( !n ) {
105  f.recycle_as_child_of( *this );
106  f.finish_construction( range_, stuff_last_ );
107  return &f;
108  } else {
109  n->my_body = &f;
110  n->my_incoming = incoming_;
111  n->my_stuff_last = stuff_last_;
112  return n;
113  }
114  }
116  if( my_body ) {
117  if( my_incoming )
118  my_left_sum->my_body.reverse_join( my_incoming->my_body );
120  sum_node& c = *this;
123  set_ref_count( (a!=NULL)+(b!=NULL) );
124  my_body = NULL;
125  if( a ) spawn(*b);
126  else a = b;
127  return a;
128  } else {
129  return NULL;
130  }
131  }
132  template<typename Range_,typename Body_,typename Partitioner_>
133  friend class start_scan;
134 
135  template<typename Range_,typename Body_>
136  friend class finish_scan;
137  };
138 
140 
141  template<typename Range, typename Body>
142  class finish_scan: public task {
147  public:
150 
152  __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
153  if( my_result.my_left )
154  my_result.my_left_is_final = false;
155  if( my_right_zombie && my_sum )
156  ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
157  __TBB_ASSERT( !my_return_slot, NULL );
160  } else {
161  destroy( my_result );
162  }
163  if( my_right_zombie && !my_sum && !my_result.my_right ) {
164  destroy(*my_right_zombie);
165  my_right_zombie = NULL;
166  }
167  return NULL;
168  }
169 
170  finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
171  my_sum(sum_),
172  my_return_slot(return_slot_),
173  my_right_zombie(NULL),
174  my_result(result_)
175  {
176  __TBB_ASSERT( !my_return_slot, NULL );
177  }
178  };
179 
181 
182  template<typename Range, typename Body, typename Partitioner=simple_partitioner>
183  class start_scan: public task {
194  Range my_range;
195  typename Partitioner::partition_type my_partition;
197  public:
198  start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
199  my_body(parent_.my_body),
200  my_sum(parent_.my_sum),
201  my_return_slot(&return_slot_),
202  my_parent_sum(parent_sum_),
203  my_is_final(parent_.my_is_final),
204  my_is_right_child(false),
205  my_range(parent_.my_range,split()),
206  my_partition(parent_.my_partition,split())
207  {
208  __TBB_ASSERT( !*my_return_slot, NULL );
209  }
210 
211  start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
212  my_body(&body_),
213  my_sum(NULL),
214  my_return_slot(&return_slot_),
215  my_parent_sum(NULL),
216  my_is_final(true),
217  my_is_right_child(false),
218  my_range(range_),
219  my_partition(partitioner_)
220  {
221  __TBB_ASSERT( !*my_return_slot, NULL );
222  }
223 
224  static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
225  if( !range_.empty() ) {
226  typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
227  internal::sum_node<Range,Body>* root = NULL;
228  final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
229  start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
230  /*my_return_slot=*/root,
231  range_,
232  *temp_body,
233  partitioner_ );
234  temp_body->my_body.reverse_join(body_);
235  task::spawn_root_and_wait( pass1 );
236  if( root ) {
237  root->my_body = temp_body;
238  root->my_incoming = NULL;
239  root->my_stuff_last = &body_;
240  task::spawn_root_and_wait( *root );
241  } else {
242  body_.assign(temp_body->my_body);
243  temp_body->finish_construction( range_, NULL );
244  temp_body->destroy(*temp_body);
245  }
246  }
247  }
248  };
249 
250  template<typename Range, typename Body, typename Partitioner>
252  typedef internal::finish_scan<Range,Body> finish_pass1_type;
253  finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
254  // Inspecting p->result.left_sum would ordinarily be a race condition.
255  // But we inspect it only if we are not a stolen task, in which case we
256  // know that task assigning to p->result.left_sum has completed.
257  bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
258  if( treat_as_stolen ) {
259  // Invocation is for right child that has been really stolen or needs to be virtually stolen
260  p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
261  my_is_final = false;
262  }
263  task* next_task = NULL;
264  if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
265  if( my_is_final )
266  (my_body->my_body)( my_range, final_scan_tag() );
267  else if( my_sum )
268  (my_body->my_body)( my_range, pre_scan_tag() );
269  if( my_sum )
270  *my_sum = my_body;
271  __TBB_ASSERT( !*my_return_slot, NULL );
272  } else {
273  sum_node_type* result;
274  if( my_parent_sum )
275  result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
276  else
277  result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
278  finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
279  // Split off right child
280  start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
281  b.my_is_right_child = true;
282  // Left child is recycling of *this. Must recycle this before spawning b,
283  // otherwise b might complete and decrement c.ref_count() to zero, which
284  // would cause c.execute() to run prematurely.
285  recycle_as_child_of(c);
286  c.set_ref_count(2);
287  c.spawn(b);
288  my_sum = &result->my_left_sum;
289  my_return_slot = &result->my_left;
290  my_is_right_child = false;
291  next_task = this;
292  my_parent_sum = result;
293  __TBB_ASSERT( !*my_return_slot, NULL );
294  }
295  return next_task;
296  }
297 
298  template<typename Range, typename Value, typename Scan, typename ReverseJoin>
300  Value my_sum;
301  const Value& identity_element;
302  const Scan& my_scan;
303  const ReverseJoin& my_reverse_join;
304  public:
305  lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
306  : my_sum(identity)
307  , identity_element(identity)
308  , my_scan(scan)
309  , my_reverse_join(rev_join) {}
310 
314  , my_scan(b.my_scan)
316 
317  template<typename Tag>
318  void operator()( const Range& r, Tag tag ) {
319  my_sum = my_scan(r, my_sum, tag);
320  }
321 
324  }
325 
327  my_sum = b.my_sum;
328  }
329 
330  Value result() const {
331  return my_sum;
332  }
333  };
334 } // namespace internal
336 
337 // Requirements on Range concept are documented in blocked_range.h
338 
356 
358 
359 template<typename Range, typename Body>
360 void parallel_scan( const Range& range, Body& body ) {
362 }
363 
365 
366 template<typename Range, typename Body>
367 void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
369 }
370 
372 
373 template<typename Range, typename Body>
374 void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
376 }
377 
379 
380 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
381 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
382  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
384  return body.result();
385 }
386 
388 
389 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
390 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
391  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
392  tbb::parallel_scan(range,body,partitioner);
393  return body.result();
394 }
395 
397 
398 template<typename Range, typename Value, typename Scan, typename ReverseJoin>
399 Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
400  internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
401  tbb::parallel_scan(range,body,partitioner);
402  return body.result();
403 }
404 
406 
407 } // namespace tbb
408 
409 #endif /* __TBB_parallel_scan_H */
410 
void finish_construction(const Range &range_, Body *stuff_last_)
Definition: parallel_scan.h:63
void const char const char int ITT_FORMAT __itt_group_sync x void const char ITT_FORMAT __itt_group_sync s void ITT_FORMAT __itt_group_sync p void ITT_FORMAT p void ITT_FORMAT p no args __itt_suppress_mode_t unsigned int void size_t ITT_FORMAT d void ITT_FORMAT p void ITT_FORMAT p __itt_model_site __itt_model_site_instance ITT_FORMAT p __itt_model_task __itt_model_task_instance ITT_FORMAT p void ITT_FORMAT p void ITT_FORMAT p void size_t ITT_FORMAT d void ITT_FORMAT p const wchar_t ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s const char ITT_FORMAT s no args void ITT_FORMAT p size_t ITT_FORMAT d no args const wchar_t const wchar_t ITT_FORMAT s __itt_heap_function void size_t int ITT_FORMAT d __itt_heap_function void ITT_FORMAT p __itt_heap_function void void size_t int ITT_FORMAT d no args no args unsigned int ITT_FORMAT u const __itt_domain __itt_id ITT_FORMAT lu const __itt_domain __itt_id __itt_id parent
task * create_child(const Range &range_, final_sum_type &f, sum_node *n, final_sum_type *incoming_, Body *stuff_last_)
Used to indicate that the final scan is being performed.
Definition: parallel_scan.h:36
Dummy type that distinguishes splitting constructor from copy constructor.
Definition: tbb_stddef.h:395
lambda_scan_body(const Value &identity, const Scan &scan, const ReverseJoin &rev_join)
void recycle_as_continuation()
Change this to be a continuation of its former self.
Definition: task.h:681
sum_node_type *& my_return_slot
finish_scan(sum_node_type *&return_slot_, final_sum_type **sum_, sum_node_type &result_)
void parallel_scan(const Range &range, Body &body)
Parallel prefix with default partitioner.
sum_node< Range, Body > sum_node_type
final_sum_type * my_right_zombie
task * execute() __TBB_override
Should be overridden by derived classes.
Definition: parallel_scan.h:68
#define __TBB_ASSERT(predicate, comment)
No-op version of __TBB_ASSERT.
Definition: tbb_stddef.h:165
final_sum_type * my_body
#define __TBB_override
Definition: tbb_stddef.h:240
Base class for user-defined tasks.
Definition: task.h:589
final_sum< Range, Body > final_sum_type
static void run(const Range &range_, Body &body_, const Partitioner &partitioner_)
static bool is_final_scan()
Definition: parallel_scan.h:30
The graph class.
Split work to be done in the scan.
Definition: parallel_scan.h:79
void reverse_join(lambda_scan_body &a)
Used to indicate that the initial scan is being performed.
Definition: parallel_scan.h:29
static bool is_final_scan()
Definition: parallel_scan.h:37
Base class for types that should not be assigned.
Definition: tbb_stddef.h:320
final_sum_type **const my_sum
Body * my_stuff_last
Where to put result of last subrange, or NULL if not last subrange.
Definition: parallel_scan.h:53
Partitioner::partition_type my_partition
Initial task to split the work.
start_scan(sum_node_type *&return_slot_, const Range &range_, final_sum_type &body_, const Partitioner &partitioner_)
A simple partitioner.
Definition: partitioner.h:583
Performs final scan for a leaf.
Definition: parallel_scan.h:47
#define __TBB_DEFAULT_PARTITIONER
Definition: tbb_config.h:586
final_sum_type * my_body
Definition: parallel_scan.h:83
final_sum_type * my_incoming
Definition: parallel_scan.h:82
lambda_scan_body(lambda_scan_body &b, split)
An auto partitioner.
Definition: partitioner.h:610
Combine partial results.
task * execute() __TBB_override
Should be overridden by derived classes.
task * execute() __TBB_override
Should be overridden by derived classes.
final_sum< Range, Body > final_sum_type
int ref_count() const
The internal reference count.
Definition: task.h:867
T * begin() const
Pointer to beginning of array.
Definition: aligned_space.h:35
void operator()(const Range &r, Tag tag)
static void spawn_root_and_wait(task &root)
Spawn task allocated by allocate_root, wait for it to complete, and deallocate it.
Definition: task.h:778
sum_node< Range, Body > sum_node_type
sum_node_type ** my_return_slot
const ReverseJoin & my_reverse_join
task * execute() __TBB_override
Should be overridden by derived classes.
void assign(lambda_scan_body &b)
void const char const char int ITT_FORMAT __itt_group_sync p
void recycle_as_child_of(task &new_parent)
Change this to be a child of new_parent.
Definition: task.h:695
final_sum_type * my_left_sum
Definition: parallel_scan.h:86
aligned_space< Range > my_range
Definition: parallel_scan.h:51
void poison_pointer(T *__TBB_atomic &)
Definition: tbb_stddef.h:305
sum_node_type * my_parent_sum
final_sum_type ** my_sum
void set_ref_count(int count)
Set reference count.
Definition: task.h:731
sum_node(const Range range_, bool left_is_final_)
Definition: parallel_scan.h:91
static internal::allocate_root_proxy allocate_root()
Returns proxy for overloaded new that allocates a root task.
Definition: task.h:633
final_sum< Range, Body > final_sum_type
Definition: parallel_scan.h:80

Copyright © 2005-2019 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.