Point Cloud Library (PCL) 1.13.0
warp_reduce.hpp
1/*
2* Software License Agreement (BSD License)
3*
4* Copyright (c) 2011, Willow Garage, Inc.
5* All rights reserved.
6*
7* Redistribution and use in source and binary forms, with or without
8* modification, are permitted provided that the following conditions
9* are met:
10*
11* * Redistributions of source code must retain the above copyright
12* notice, this list of conditions and the following disclaimer.
13* * Redistributions in binary form must reproduce the above
14* copyright notice, this list of conditions and the following
15* disclaimer in the documentation and/or other materials provided
16* with the distribution.
17* * Neither the name of Willow Garage, Inc. nor the names of its
18* contributors may be used to endorse or promote products derived
19* from this software without specific prior written permission.
20*
21* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32* POSSIBILITY OF SUCH DAMAGE.
33*
34* Author: Anatoly Baskeheev, Itseez Ltd, (myname.mysurname@mycompany.com)
35*/
36
37#ifndef PCL_GPU_WARP_REDUCE
38#define PCL_GPU_WARP_REDUCE
39
40
41namespace pcl
42{
43 namespace device
44 {
45 template <class T>
46 __device__ __forceinline__ T warp_reduce ( volatile T *ptr , const unsigned int tid = threadIdx.x )
47 {
48 const unsigned int lane = tid & 31; // index of thread in warp (0..31)
49
50 if (lane < 16)
51 {
52 T partial = ptr[tid];
53
54 ptr[tid] = partial = partial + ptr[tid + 16];
55 ptr[tid] = partial = partial + ptr[tid + 8];
56 ptr[tid] = partial = partial + ptr[tid + 4];
57 ptr[tid] = partial = partial + ptr[tid + 2];
58 ptr[tid] = partial = partial + ptr[tid + 1];
59 }
60 return ptr[tid - lane];
61
62 }
63 }
64}
65
66#endif
__device__ __forceinline__ T warp_reduce(volatile T *ptr, const unsigned int tid=threadIdx.x)
Definition: warp_reduce.hpp:46