Point Cloud Library (PCL)  1.12.0
NCVAlg.hpp
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (C) 2009-2010, NVIDIA Corporation, all rights reserved.
6  * Third party copyrights are property of their respective owners.
7  *
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  *
14  * * Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * * Redistributions in binary form must reproduce the above
17  * copyright notice, this list of conditions and the following
18  * disclaimer in the documentation and/or other materials provided
19  * with the distribution.
20  * * Neither the name of Willow Garage, Inc. nor the names of its
21  * contributors may be used to endorse or promote products derived
22  * from this software without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
30  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
34  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  *
37  * $Id: $
38  * Ported to PCL by Koen Buys : Attention Work in progress!
39  */
40 
41 #ifndef _ncv_alg_hpp_
42 #define _ncv_alg_hpp_
43 
44 #include "NCV.hpp"
45 
46 
47 template <class T>
48 static void swap(T &p1, T &p2)
49 {
50  T tmp = p1;
51  p1 = p2;
52  p2 = tmp;
53 }
54 
55 
56 template<typename T>
57 static T divUp(T a, T b)
58 {
59  return (a + b - 1) / b;
60 }
61 
62 
63 template<typename T>
65 {
66  static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
67  {
68  //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
69  *dst = *src;
70  }
71  static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
72  {
73  in1out += in2;
74  }
75 };
76 
77 
78 template<typename T>
80 {
81  static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
82  {
83  //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
84  *dst = *src;
85  }
86  static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
87  {
88  in1out = in1out > in2 ? in2 : in1out;
89  }
90 };
91 
92 
93 template<typename T>
95 {
96  static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
97  {
98  //Works only for integral types. If you see compiler error here, then you have to specify how to copy your object as a set of integral fields.
99  *dst = *src;
100  }
101  static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
102  {
103  in1out = in1out > in2 ? in1out : in2;
104  }
105 };
106 
107 
108 template<typename Tdata, class Tfunc, Ncv32u nThreads>
109 static __device__ Tdata subReduce(Tdata threadElem)
110 {
111  Tfunc functor;
112 
113  __shared__ Tdata _reduceArr[nThreads];
114  volatile Tdata *reduceArr = _reduceArr;
115  functor.assign(reduceArr + threadIdx.x, &threadElem);
116  __syncthreads();
117 
118  if (nThreads >= 256 && threadIdx.x < 128)
119  {
120  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 128]);
121  }
122  __syncthreads();
123 
124  if (nThreads >= 128 && threadIdx.x < 64)
125  {
126  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 64]);
127  }
128  __syncthreads();
129 
130  if (threadIdx.x < 32)
131  {
132  if (nThreads >= 64)
133  {
134  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 32]);
135  }
136  if (nThreads >= 32 && threadIdx.x < 16)
137  {
138  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 16]);
139  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 8]);
140  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 4]);
141  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 2]);
142  functor.reduce(reduceArr[threadIdx.x], reduceArr[threadIdx.x + 1]);
143  }
144  }
145 
146  __syncthreads();
147  Tdata reduceRes;
148  functor.assign(&reduceRes, reduceArr);
149  return reduceRes;
150 }
151 
152 
153 #endif //_ncv_alg_hpp_
__device__ __host__ __forceinline__ void swap(T &a, T &b)
Definition: utils.hpp:53
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:71
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:66
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:96
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:101
static __device__ __inline__ void reduce(volatile T &in1out, const volatile T &in2)
Definition: NCVAlg.hpp:86
static __device__ __inline__ void assign(volatile T *dst, volatile T *src)
Definition: NCVAlg.hpp:81