libstdc++
balanced_quicksort.h
Go to the documentation of this file.
1 // -*- C++ -*-
2 
3 // Copyright (C) 2007-2019 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the terms
7 // of the GNU General Public License as published by the Free Software
8 // Foundation; either version 3, or (at your option) any later
9 // version.
10 
11 // This library is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file parallel/balanced_quicksort.h
26  * @brief Implementation of a dynamically load-balanced parallel quicksort.
27  *
28  * It works in-place and needs only logarithmic extra memory.
29  * The algorithm is similar to the one proposed in
30  *
31  * P. Tsigas and Y. Zhang.
32  * A simple, fast parallel implementation of quicksort and
33  * its performance evaluation on SUN enterprise 10000.
34  * In 11th Euromicro Conference on Parallel, Distributed and
35  * Network-Based Processing, page 372, 2003.
36  *
37  * This file is a GNU parallel extension to the Standard C++ Library.
38  */
39 
40 // Written by Johannes Singler.
41 
42 #ifndef _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H
43 #define _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H 1
44 
46 #include <bits/stl_algo.h>
47 #include <bits/stl_function.h>
48 
49 #include <parallel/settings.h>
50 #include <parallel/partition.h>
51 #include <parallel/random_number.h>
52 #include <parallel/queue.h>
53 
54 #if _GLIBCXX_PARALLEL_ASSERTIONS
55 #include <parallel/checkers.h>
56 #ifdef _GLIBCXX_HAVE_UNISTD_H
57 #include <unistd.h>
58 #endif
59 #endif
60 
61 namespace __gnu_parallel
62 {
63  /** @brief Information local to one thread in the parallel quicksort run. */
64  template<typename _RAIter>
66  {
67  typedef std::iterator_traits<_RAIter> _TraitsType;
68  typedef typename _TraitsType::difference_type _DifferenceType;
69 
70  /** @brief Continuous part of the sequence, described by an
71  iterator pair. */
73 
74  /** @brief Initial piece to work on. */
76 
77  /** @brief Work-stealing queue. */
79 
80  /** @brief Number of threads involved in this algorithm. */
82 
83  /** @brief Pointer to a counter of elements left over to sort. */
84  volatile _DifferenceType* _M_elements_leftover;
85 
86  /** @brief The complete sequence to sort. */
88 
89  /** @brief Constructor.
90  * @param __queue_size size of the work-stealing queue. */
91  _QSBThreadLocal(int __queue_size) : _M_leftover_parts(__queue_size) { }
92  };
93 
94  /** @brief Balanced quicksort divide step.
95  * @param __begin Begin iterator of subsequence.
96  * @param __end End iterator of subsequence.
97  * @param __comp Comparator.
98  * @param __num_threads Number of threads that are allowed to work on
99  * this part.
100  * @pre @c (__end-__begin)>=1 */
101  template<typename _RAIter, typename _Compare>
102  typename std::iterator_traits<_RAIter>::difference_type
103  __qsb_divide(_RAIter __begin, _RAIter __end,
104  _Compare __comp, _ThreadIndex __num_threads)
105  {
106  _GLIBCXX_PARALLEL_ASSERT(__num_threads > 0);
107 
108  typedef std::iterator_traits<_RAIter> _TraitsType;
109  typedef typename _TraitsType::value_type _ValueType;
110  typedef typename _TraitsType::difference_type _DifferenceType;
111 
112  _RAIter __pivot_pos =
113  __median_of_three_iterators(__begin, __begin + (__end - __begin) / 2,
114  __end - 1, __comp);
115 
116 #if defined(_GLIBCXX_PARALLEL_ASSERTIONS)
117  // Must be in between somewhere.
118  _DifferenceType __n = __end - __begin;
119 
120  _GLIBCXX_PARALLEL_ASSERT((!__comp(*__pivot_pos, *__begin)
121  && !__comp(*(__begin + __n / 2),
122  *__pivot_pos))
123  || (!__comp(*__pivot_pos, *__begin)
124  && !__comp(*(__end - 1), *__pivot_pos))
125  || (!__comp(*__pivot_pos, *(__begin + __n / 2))
126  && !__comp(*__begin, *__pivot_pos))
127  || (!__comp(*__pivot_pos, *(__begin + __n / 2))
128  && !__comp(*(__end - 1), *__pivot_pos))
129  || (!__comp(*__pivot_pos, *(__end - 1))
130  && !__comp(*__begin, *__pivot_pos))
131  || (!__comp(*__pivot_pos, *(__end - 1))
132  && !__comp(*(__begin + __n / 2),
133  *__pivot_pos)));
134 #endif
135 
136  // Swap pivot value to end.
137  if (__pivot_pos != (__end - 1))
138  std::iter_swap(__pivot_pos, __end - 1);
139  __pivot_pos = __end - 1;
140 
142  __pred(__comp, *__pivot_pos);
143 
144  // Divide, returning __end - __begin - 1 in the worst case.
145  _DifferenceType __split_pos = __parallel_partition(__begin, __end - 1,
146  __pred,
147  __num_threads);
148 
149  // Swap back pivot to middle.
150  std::iter_swap(__begin + __split_pos, __pivot_pos);
151  __pivot_pos = __begin + __split_pos;
152 
153 #if _GLIBCXX_PARALLEL_ASSERTIONS
154  _RAIter __r;
155  for (__r = __begin; __r != __pivot_pos; ++__r)
156  _GLIBCXX_PARALLEL_ASSERT(__comp(*__r, *__pivot_pos));
157  for (; __r != __end; ++__r)
158  _GLIBCXX_PARALLEL_ASSERT(!__comp(*__r, *__pivot_pos));
159 #endif
160 
161  return __split_pos;
162  }
163 
164  /** @brief Quicksort conquer step.
165  * @param __tls Array of thread-local storages.
166  * @param __begin Begin iterator of subsequence.
167  * @param __end End iterator of subsequence.
168  * @param __comp Comparator.
169  * @param __iam Number of the thread processing this function.
170  * @param __num_threads
171  * Number of threads that are allowed to work on this part. */
172  template<typename _RAIter, typename _Compare>
173  void
175  _RAIter __begin, _RAIter __end,
176  _Compare __comp,
177  _ThreadIndex __iam, _ThreadIndex __num_threads,
178  bool __parent_wait)
179  {
180  typedef std::iterator_traits<_RAIter> _TraitsType;
181  typedef typename _TraitsType::value_type _ValueType;
182  typedef typename _TraitsType::difference_type _DifferenceType;
183 
184  _DifferenceType __n = __end - __begin;
185 
186  if (__num_threads <= 1 || __n <= 1)
187  {
188  __tls[__iam]->_M_initial.first = __begin;
189  __tls[__iam]->_M_initial.second = __end;
190 
191  __qsb_local_sort_with_helping(__tls, __comp, __iam, __parent_wait);
192 
193  return;
194  }
195 
196  // Divide step.
197  _DifferenceType __split_pos =
198  __qsb_divide(__begin, __end, __comp, __num_threads);
199 
200 #if _GLIBCXX_PARALLEL_ASSERTIONS
201  _GLIBCXX_PARALLEL_ASSERT(0 <= __split_pos &&
202  __split_pos < (__end - __begin));
203 #endif
204 
206  __num_threads_leftside = std::max<_ThreadIndex>
207  (1, std::min<_ThreadIndex>(__num_threads - 1, __split_pos
208  * __num_threads / __n));
209 
210 # pragma omp atomic
211  *__tls[__iam]->_M_elements_leftover -= (_DifferenceType)1;
212 
213  // Conquer step.
214 # pragma omp parallel num_threads(2)
215  {
216  bool __wait;
217  if(omp_get_num_threads() < 2)
218  __wait = false;
219  else
220  __wait = __parent_wait;
221 
222 # pragma omp sections
223  {
224 # pragma omp section
225  {
226  __qsb_conquer(__tls, __begin, __begin + __split_pos, __comp,
227  __iam, __num_threads_leftside, __wait);
228  __wait = __parent_wait;
229  }
230  // The pivot_pos is left in place, to ensure termination.
231 # pragma omp section
232  {
233  __qsb_conquer(__tls, __begin + __split_pos + 1, __end, __comp,
234  __iam + __num_threads_leftside,
235  __num_threads - __num_threads_leftside, __wait);
236  __wait = __parent_wait;
237  }
238  }
239  }
240  }
241 
242  /**
243  * @brief Quicksort step doing load-balanced local sort.
244  * @param __tls Array of thread-local storages.
245  * @param __comp Comparator.
246  * @param __iam Number of the thread processing this function.
247  */
248  template<typename _RAIter, typename _Compare>
249  void
251  _Compare& __comp, _ThreadIndex __iam,
252  bool __wait)
253  {
254  typedef std::iterator_traits<_RAIter> _TraitsType;
255  typedef typename _TraitsType::value_type _ValueType;
256  typedef typename _TraitsType::difference_type _DifferenceType;
258 
259  _QSBThreadLocal<_RAIter>& __tl = *__tls[__iam];
260 
261  _DifferenceType
263  if (__base_case_n < 2)
264  __base_case_n = 2;
265  _ThreadIndex __num_threads = __tl._M_num_threads;
266 
267  // Every thread has its own random number generator.
268  _RandomNumber __rng(__iam + 1);
269 
270  _Piece __current = __tl._M_initial;
271 
272  _DifferenceType __elements_done = 0;
273 #if _GLIBCXX_PARALLEL_ASSERTIONS
274  _DifferenceType __total_elements_done = 0;
275 #endif
276 
277  for (;;)
278  {
279  // Invariant: __current must be a valid (maybe empty) range.
280  _RAIter __begin = __current.first, __end = __current.second;
281  _DifferenceType __n = __end - __begin;
282 
283  if (__n > __base_case_n)
284  {
285  // Divide.
286  _RAIter __pivot_pos = __begin + __rng(__n);
287 
288  // Swap __pivot_pos value to end.
289  if (__pivot_pos != (__end - 1))
290  std::iter_swap(__pivot_pos, __end - 1);
291  __pivot_pos = __end - 1;
292 
294  <_Compare, _ValueType, _ValueType, bool>
295  __pred(__comp, *__pivot_pos);
296 
297  // Divide, leave pivot unchanged in last place.
298  _RAIter __split_pos1, __split_pos2;
299  __split_pos1 = __gnu_sequential::partition(__begin, __end - 1,
300  __pred);
301 
302  // Left side: < __pivot_pos; __right side: >= __pivot_pos.
303 #if _GLIBCXX_PARALLEL_ASSERTIONS
304  _GLIBCXX_PARALLEL_ASSERT(__begin <= __split_pos1
305  && __split_pos1 < __end);
306 #endif
307  // Swap pivot back to middle.
308  if (__split_pos1 != __pivot_pos)
309  std::iter_swap(__split_pos1, __pivot_pos);
310  __pivot_pos = __split_pos1;
311 
312  // In case all elements are equal, __split_pos1 == 0.
313  if ((__split_pos1 + 1 - __begin) < (__n >> 7)
314  || (__end - __split_pos1) < (__n >> 7))
315  {
316  // Very unequal split, one part smaller than one 128th
317  // elements not strictly larger than the pivot.
319  <_Compare, _ValueType, _ValueType, bool>, _ValueType>
321  <_Compare, _ValueType, _ValueType, bool>
322  (__comp, *__pivot_pos));
323 
324  // Find other end of pivot-equal range.
325  __split_pos2 = __gnu_sequential::partition(__split_pos1 + 1,
326  __end, __pred);
327  }
328  else
329  // Only skip the pivot.
330  __split_pos2 = __split_pos1 + 1;
331 
332  // Elements equal to pivot are done.
333  __elements_done += (__split_pos2 - __split_pos1);
334 #if _GLIBCXX_PARALLEL_ASSERTIONS
335  __total_elements_done += (__split_pos2 - __split_pos1);
336 #endif
337  // Always push larger part onto stack.
338  if (((__split_pos1 + 1) - __begin) < (__end - (__split_pos2)))
339  {
340  // Right side larger.
341  if ((__split_pos2) != __end)
342  __tl._M_leftover_parts.push_front
343  (std::make_pair(__split_pos2, __end));
344 
345  //__current.first = __begin; //already set anyway
346  __current.second = __split_pos1;
347  continue;
348  }
349  else
350  {
351  // Left side larger.
352  if (__begin != __split_pos1)
353  __tl._M_leftover_parts.push_front(std::make_pair
354  (__begin, __split_pos1));
355 
356  __current.first = __split_pos2;
357  //__current.second = __end; //already set anyway
358  continue;
359  }
360  }
361  else
362  {
363  __gnu_sequential::sort(__begin, __end, __comp);
364  __elements_done += __n;
365 #if _GLIBCXX_PARALLEL_ASSERTIONS
366  __total_elements_done += __n;
367 #endif
368 
369  // Prefer own stack, small pieces.
370  if (__tl._M_leftover_parts.pop_front(__current))
371  continue;
372 
373 # pragma omp atomic
374  *__tl._M_elements_leftover -= __elements_done;
375 
376  __elements_done = 0;
377 
378 #if _GLIBCXX_PARALLEL_ASSERTIONS
379  double __search_start = omp_get_wtime();
380 #endif
381 
382  // Look for new work.
383  bool __successfully_stolen = false;
384  while (__wait && *__tl._M_elements_leftover > 0
385  && !__successfully_stolen
387  // Possible dead-lock.
388  && (omp_get_wtime() < (__search_start + 1.0))
389 #endif
390  )
391  {
392  _ThreadIndex __victim;
393  __victim = __rng(__num_threads);
394 
395  // Large pieces.
396  __successfully_stolen = (__victim != __iam)
397  && __tls[__victim]->_M_leftover_parts.pop_back(__current);
398  if (!__successfully_stolen)
399  __yield();
400 #if !defined(__ICC) && !defined(__ECC)
401 # pragma omp flush
402 #endif
403  }
404 
405 #if _GLIBCXX_PARALLEL_ASSERTIONS
406  if (omp_get_wtime() >= (__search_start + 1.0))
407  {
408  sleep(1);
409  _GLIBCXX_PARALLEL_ASSERT(omp_get_wtime()
410  < (__search_start + 1.0));
411  }
412 #endif
413  if (!__successfully_stolen)
414  {
415 #if _GLIBCXX_PARALLEL_ASSERTIONS
416  _GLIBCXX_PARALLEL_ASSERT(*__tl._M_elements_leftover == 0);
417 #endif
418  return;
419  }
420  }
421  }
422  }
423 
424  /** @brief Top-level quicksort routine.
425  * @param __begin Begin iterator of sequence.
426  * @param __end End iterator of sequence.
427  * @param __comp Comparator.
428  * @param __num_threads Number of threads that are allowed to work on
429  * this part.
430  */
431  template<typename _RAIter, typename _Compare>
432  void
433  __parallel_sort_qsb(_RAIter __begin, _RAIter __end,
434  _Compare __comp, _ThreadIndex __num_threads)
435  {
436  _GLIBCXX_CALL(__end - __begin)
437 
438  typedef std::iterator_traits<_RAIter> _TraitsType;
439  typedef typename _TraitsType::value_type _ValueType;
440  typedef typename _TraitsType::difference_type _DifferenceType;
442 
443  typedef _QSBThreadLocal<_RAIter> _TLSType;
444 
445  _DifferenceType __n = __end - __begin;
446 
447  if (__n <= 1)
448  return;
449 
450  // At least one element per processor.
451  if (__num_threads > __n)
452  __num_threads = static_cast<_ThreadIndex>(__n);
453 
454  // Initialize thread local storage
455  _TLSType** __tls = new _TLSType*[__num_threads];
456  _DifferenceType __queue_size = (__num_threads
457  * (_ThreadIndex)(__rd_log2(__n) + 1));
458  for (_ThreadIndex __t = 0; __t < __num_threads; ++__t)
459  __tls[__t] = new _QSBThreadLocal<_RAIter>(__queue_size);
460 
461  // There can never be more than ceil(__rd_log2(__n)) ranges on the
462  // stack, because
463  // 1. Only one processor pushes onto the stack
464  // 2. The largest range has at most length __n
465  // 3. Each range is larger than half of the range remaining
466  volatile _DifferenceType __elements_leftover = __n;
467  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
468  {
469  __tls[__i]->_M_elements_leftover = &__elements_leftover;
470  __tls[__i]->_M_num_threads = __num_threads;
471  __tls[__i]->_M_global = std::make_pair(__begin, __end);
472 
473  // Just in case nothing is left to assign.
474  __tls[__i]->_M_initial = std::make_pair(__end, __end);
475  }
476 
477  // Main recursion call.
478  __qsb_conquer(__tls, __begin, __begin + __n, __comp, 0,
479  __num_threads, true);
480 
481 #if _GLIBCXX_PARALLEL_ASSERTIONS
482  // All stack must be empty.
483  _Piece __dummy;
484  for (_ThreadIndex __i = 1; __i < __num_threads; ++__i)
485  _GLIBCXX_PARALLEL_ASSERT(
486  !__tls[__i]->_M_leftover_parts.pop_back(__dummy));
487 #endif
488 
489  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
490  delete __tls[__i];
491  delete[] __tls;
492  }
493 } // namespace __gnu_parallel
494 
495 #endif /* _GLIBCXX_PARALLEL_BALANCED_QUICKSORT_H */
static const _Settings & get()
Get the global settings.
Subsequence description.
_RAIter __median_of_three_iterators(_RAIter __a, _RAIter __b, _RAIter __c, _Compare __comp)
Compute the median of three referenced elements, according to __comp.
_Piece _M_initial
Initial piece to work on.
_Piece _M_global
The complete sequence to sort.
Similar to std::unary_negate, but giving the argument types explicitly.
Parallel implementation of std::partition(), std::nth_element(), and std::partial_sort()....
volatile _DifferenceType * _M_elements_leftover
Pointer to a counter of elements left over to sort.
void __qsb_local_sort_with_helping(_QSBThreadLocal< _RAIter > **__tls, _Compare &__comp, _ThreadIndex __iam, bool __wait)
Quicksort step doing load-balanced local sort.
#define _GLIBCXX_CALL(__n)
Macro to produce log message when entering a function.
Lock-free double-ended queue. This file is a GNU parallel extension to the Standard C++ Library.
Similar to std::binder2nd, but giving the argument types explicitly.
std::pair< _RAIter, _RAIter > _Piece
Continuous part of the sequence, described by an iterator pair.
Random number generator, based on the Mersenne twister.
Definition: random_number.h:42
Similar to std::binder1st, but giving the argument types explicitly.
_ThreadIndex _M_num_threads
Number of threads involved in this algorithm.
void __yield()
Yield control to another thread, without waiting for the end of the time slice.
_Size __rd_log2(_Size __n)
Calculates the rounded-down logarithm of __n for base 2.
constexpr pair< typename __decay_and_strip< _T1 >::__type, typename __decay_and_strip< _T2 >::__type > make_pair(_T1 &&__x, _T2 &&__y)
A convenience wrapper for creating a pair from two objects.
Definition: stl_pair.h:524
std::iterator_traits< _RAIter >::difference_type __qsb_divide(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Balanced quicksort divide step.
#define _GLIBCXX_PARALLEL_ASSERTIONS
Switch on many _GLIBCXX_PARALLEL_ASSERTions in parallel code. Should be switched on only locally.
uint16_t _ThreadIndex
Unsigned integer to index a thread number. The maximum thread number (for each processor) must fit in...
Definition: types.h:123
Routines for checking the correctness of algorithm results. This file is a GNU parallel extension to ...
_RestrictedBoundedConcurrentQueue< _Piece > _M_leftover_parts
Work-stealing queue.
std::iterator_traits< _RAIter >::difference_type __parallel_partition(_RAIter __begin, _RAIter __end, _Predicate __pred, _ThreadIndex __num_threads)
Parallel implementation of std::partition.
Definition: partition.h:56
Includes the original header files concerned with iterators except for stream iterators....
void __parallel_sort_qsb(_RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __num_threads)
Top-level quicksort routine.
_QSBThreadLocal(int __queue_size)
Constructor.
GNU parallel code for public use.
_SequenceIndex sort_qsb_base_case_maximal_n
Maximal subsequence __length to switch to unbalanced __base case. Applies to std::sort with dynamical...
Definition: settings.h:241
Information local to one thread in the parallel quicksort run.
Runtime settings and tuning parameters, heuristics to decide whether to use parallelized algorithms....
Random number generator based on the Mersenne twister. This file is a GNU parallel extension to the S...
Double-ended queue of bounded size, allowing lock-free atomic access. push_front() and pop_front() mu...
Definition: queue.h:52
void __qsb_conquer(_QSBThreadLocal< _RAIter > **__tls, _RAIter __begin, _RAIter __end, _Compare __comp, _ThreadIndex __iam, _ThreadIndex __num_threads, bool __parent_wait)
Quicksort conquer step.