Point Cloud Library (PCL) 1.12.0
warp.hpp
1/*
2* Software License Agreement (BSD License)
3*
4* Copyright (c) 2011, Willow Garage, Inc.
5* All rights reserved.
6*
7* Redistribution and use in source and binary forms, with or without
8* modification, are permitted provided that the following conditions
9* are met:
10*
11* * Redistributions of source code must retain the above copyright
12* notice, this list of conditions and the following disclaimer.
13* * Redistributions in binary form must reproduce the above
14* copyright notice, this list of conditions and the following
15* disclaimer in the documentation and/or other materials provided
16* with the distribution.
17* * Neither the name of Willow Garage, Inc. nor the names of its
18* contributors may be used to endorse or promote products derived
19* from this software without specific prior written permission.
20*
21* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32* POSSIBILITY OF SUCH DAMAGE.
33*
34* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35*/
36
37#ifndef PCL_DEVICE_UTILS_WARP_HPP_
38#define PCL_DEVICE_UTILS_WARP_HPP_
39
40namespace pcl
41{
42 namespace device
43 {
44 struct Warp
45 {
46 enum
47 {
48 LOG_WARP_SIZE = 5,
51 };
52
53 /** \brief Returns the warp lane ID of the calling thread. */
54 static __device__ __forceinline__ unsigned int laneId()
55 {
56 unsigned int ret;
57 asm("mov.u32 %0, %laneid;" : "=r"(ret) );
58 return ret;
59 }
60
61 static __device__ __forceinline__ int laneMaskLe()
62 {
63 unsigned int ret;
64 asm("mov.u32 %0, %lanemask_le;" : "=r"(ret) );
65 return ret;
66 }
67
68 static __device__ __forceinline__ int laneMaskLt()
69 {
70 unsigned int ret;
71 asm("mov.u32 %0, %lanemask_lt;" : "=r"(ret) );
72 return ret;
73 }
74 static __device__ __forceinline__ unsigned int id()
75 {
76 int tid = threadIdx.z * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x;
77 return tid >> LOG_WARP_SIZE;
78 }
79
80 static __device__ __forceinline__ int binaryInclScan(int ballot_mask)
81 {
82 return __popc(Warp::laneMaskLe() & ballot_mask);
83 }
84
85 static __device__ __forceinline__ int binaryExclScan(int ballot_mask)
86 {
87 return __popc(Warp::laneMaskLt() & ballot_mask);
88 }
89
90 template<typename It, typename T>
91 static __device__ __forceinline__ void fill(It beg, It end, const T& value)
92 {
93 for(It t = beg + laneId(); t < end; t += STRIDE)
94 *t = value;
95 }
96
97 template<typename InIt, typename OutIt>
98 static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
99 {
100 unsigned int lane = laneId();
101 InIt t = beg + lane;
102 OutIt o = out + lane;
103
104 for(; t < end; t += STRIDE, o += STRIDE)
105 *o = *t;
106 return o;
107 }
108
109 template<typename InIt, typename OutIt, class UnOp>
110 static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
111 {
112 unsigned int lane = laneId();
113 InIt t = beg + lane;
114 OutIt o = out + lane;
115
116 for(InIt t = beg + laneId(); t < end; t += STRIDE, o += STRIDE)
117 *o = op(*t);
118 return o;
119 }
120
121 template<typename InIt1, typename InIt2, typename OutIt, class BinOp>
122 static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
123 {
124 unsigned int lane = laneId();
125 InIt1 t1 = beg1 + lane;
126 InIt2 t2 = beg2 + lane;
127 OutIt o = out + lane;
128
129 for(; t1 < end1; t1 += STRIDE, t2 += STRIDE, o += STRIDE)
130 *o = op(*t1, *t2);
131 return o;
132 }
133
134 template<typename OutIt, typename T>
135 static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
136 {
137 unsigned int lane = laneId();
138 value += lane;
139
140 for(OutIt t = beg + lane; t < end; t += STRIDE, value += STRIDE)
141 *t = value;
142 }
143
144 template<typename T, class BinOp>
145 static __device__ __forceinline__ void reduce(volatile T* buffer, BinOp op)
146 {
147 unsigned int lane = laneId();
148 T val = buffer[lane];
149
150 if (lane < 16)
151 {
152 buffer[lane] = val = op(val, buffer[lane + 16]);
153 buffer[lane] = val = op(val, buffer[lane + 8]);
154 buffer[lane] = val = op(val, buffer[lane + 4]);
155 buffer[lane] = val = op(val, buffer[lane + 2]);
156 buffer[lane] = val = op(val, buffer[lane + 1]);
157 }
158 }
159
160 template<typename T, class BinOp>
161 static __device__ __forceinline__ T reduce(volatile T* buffer, T init, BinOp op)
162 {
163 unsigned int lane = laneId();
164 T val = buffer[lane] = init;
165
166 if (lane < 16)
167 {
168 buffer[lane] = val = op(val, buffer[lane + 16]);
169 buffer[lane] = val = op(val, buffer[lane + 8]);
170 buffer[lane] = val = op(val, buffer[lane + 4]);
171 buffer[lane] = val = op(val, buffer[lane + 2]);
172 buffer[lane] = val = op(val, buffer[lane + 1]);
173 }
174 return buffer[0];
175 }
176 };
177 }
178}
179
180#endif /* PCL_DEVICE_UTILS_WARP_HPP_ */
static __device__ __forceinline__ int laneMaskLe()
Definition: warp.hpp:61
static __device__ __forceinline__ int binaryInclScan(int ballot_mask)
Definition: warp.hpp:80
static __device__ __forceinline__ void reduce(volatile T *buffer, BinOp op)
Definition: warp.hpp:145
static __device__ __forceinline__ unsigned int laneId()
Returns the warp lane ID of the calling thread.
Definition: warp.hpp:54
static __device__ __forceinline__ OutIt transform(InIt1 beg1, InIt1 end1, InIt2 beg2, OutIt out, BinOp op)
Definition: warp.hpp:122
static __device__ __forceinline__ int binaryExclScan(int ballot_mask)
Definition: warp.hpp:85
static __device__ __forceinline__ int laneMaskLt()
Definition: warp.hpp:68
static __device__ __forceinline__ OutIt copy(InIt beg, InIt end, OutIt out)
Definition: warp.hpp:98
static __device__ __forceinline__ void fill(It beg, It end, const T &value)
Definition: warp.hpp:91
static __device__ __forceinline__ T reduce(volatile T *buffer, T init, BinOp op)
Definition: warp.hpp:161
static __device__ __forceinline__ unsigned int id()
Definition: warp.hpp:74
static __device__ __forceinline__ void yota(OutIt beg, OutIt end, T value)
Definition: warp.hpp:135
static __device__ __forceinline__ OutIt transform(InIt beg, InIt end, OutIt out, UnOp op)
Definition: warp.hpp:110