Point Cloud Library (PCL) 1.12.0
bitonic_sort.hpp
1/*
2* Software License Agreement (BSD License)
3*
4* Copyright (c) 2011, Willow Garage, Inc.
5* All rights reserved.
6*
7* Redistribution and use in source and binary forms, with or without
8* modification, are permitted provided that the following conditions
9* are met:
10*
11* * Redistributions of source code must retain the above copyright
12* notice, this list of conditions and the following disclaimer.
13* * Redistributions in binary form must reproduce the above
14* copyright notice, this list of conditions and the following
15* disclaimer in the documentation and/or other materials provided
16* with the distribution.
17* * Neither the name of Willow Garage, Inc. nor the names of its
18* contributors may be used to endorse or promote products derived
19* from this software without specific prior written permission.
20*
21* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32* POSSIBILITY OF SUCH DAMAGE.
33*
34* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35*/
36
37#ifndef PCL_GPU_BITONIC_SORT_WARP_HPP
38#define PCL_GPU_BITONIC_SORT_WARP_HPP
39
40namespace pcl
41{
42 namespace device
43 {
44 template<typename T>
45 __device__ __forceinline__ void swap(T& a, T& b) { T t = a; a = b; b = t; }
46
47 template<typename V, typename K>
48 __device__ __forceinline__ void bitonicSortWarp(volatile K* keys, volatile V* vals, unsigned int dir = 1)
49 {
50 const unsigned int arrayLength = 64;
51 unsigned int lane = threadIdx.x & 31;
52
53 for(unsigned int size = 2; size < arrayLength; size <<= 1)
54 {
55 //Bitonic merge
56 unsigned int ddd = dir ^ ( (lane & (size / 2)) != 0 );
57
58 for(unsigned int stride = size / 2; stride > 0; stride >>= 1)
59 {
60 unsigned int pos = 2 * lane - (lane & (stride - 1));
61
62 if ( (keys[pos] > keys[pos + stride]) == ddd )
63 {
64 swap(keys[pos], keys[pos + stride]);
65 swap(vals[pos], vals[pos + stride]);
66 }
67 }
68 }
69
70 //ddd == dir for the last bitonic merge step
71 for(unsigned int stride = arrayLength / 2; stride > 0; stride >>= 1)
72 {
73 unsigned int pos = 2 * lane - (lane & (stride - 1));
74
75 if ( (keys[pos] > keys[pos + stride]) == dir )
76 {
77 swap(keys[pos], keys[pos + stride]);
78 swap(vals[pos], vals[pos + stride]);
79 }
80 }
81 }
82
83 }
84}
85
86#endif /* PCL_GPU_BITONIC_SORT_WARP_HPP */
@ K
Definition: norms.h:54
__device__ __forceinline__ void bitonicSortWarp(volatile K *keys, volatile V *vals, unsigned int dir=1)
__device__ __host__ __forceinline__ void swap(T &a, T &b)
Definition: utils.hpp:53