OpenJPH
Open-source implementation of JPEG2000 Part-15
ojph_arch.h
Go to the documentation of this file.
1 //***************************************************************************/
2 // This software is released under the 2-Clause BSD license, included
3 // below.
4 //
5 // Copyright (c) 2019, Aous Naman
6 // Copyright (c) 2019, Kakadu Software Pty Ltd, Australia
7 // Copyright (c) 2019, The University of New South Wales, Australia
8 //
9 // Redistribution and use in source and binary forms, with or without
10 // modification, are permitted provided that the following conditions are
11 // met:
12 //
13 // 1. Redistributions of source code must retain the above copyright
14 // notice, this list of conditions and the following disclaimer.
15 //
16 // 2. Redistributions in binary form must reproduce the above copyright
17 // notice, this list of conditions and the following disclaimer in the
18 // documentation and/or other materials provided with the distribution.
19 //
20 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
21 // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 // TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
23 // PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 // HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
26 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 //***************************************************************************/
32 // This file is part of the OpenJPH software implementation.
33 // File: ojph_arch.h
34 // Author: Aous Naman
35 // Date: 28 August 2019
36 //***************************************************************************/
37 
38 
39 #ifndef OJPH_ARCH_H
40 #define OJPH_ARCH_H
41 
42 #include <cstdio>
43 #include <cstdint>
44 #include <cmath>
45 
46 #include "ojph_defs.h"
47 
48 
50 // preprocessor directives for compiler
52 #ifdef _MSC_VER
53 #define OJPH_COMPILER_MSVC
54 #elif (defined __GNUC__)
55 #define OJPH_COMPILER_GNUC
56 #endif
57 
58 #ifdef __EMSCRIPTEN__
59 #define OJPH_EMSCRIPTEN
60 #endif
61 
62 #ifdef OJPH_COMPILER_MSVC
63 #include <intrin.h>
64 #endif
65 
66 namespace ojph {
67 
69  // cpu features
71  int get_cpu_ext_level();
72 
73  enum : int {
86  };
87 
89  static inline ui32 population_count(ui32 val)
90  {
91  #ifdef OJPH_COMPILER_MSVC
92  return (ui32)__popcnt(val);
93  #elif (defined OJPH_COMPILER_GNUC)
94  return (ui32)__builtin_popcount(val);
95  #else
96  val -= ((val >> 1) & 0x55555555);
97  val = (((val >> 2) & 0x33333333) + (val & 0x33333333));
98  val = (((val >> 4) + val) & 0x0f0f0f0f);
99  val += (val >> 8);
100  val += (val >> 16);
101  return (int)(val & 0x0000003f);
102  #endif
103  }
104 
106 #ifdef OJPH_COMPILER_MSVC
107  #pragma intrinsic(_BitScanReverse)
108 #endif
109  static inline ui32 count_leading_zeros(ui32 val)
110  {
111  #ifdef OJPH_COMPILER_MSVC
112  unsigned long result = 0;
113  _BitScanReverse(&result, val);
114  return 31 ^ (ui32)result;
115  #elif (defined OJPH_COMPILER_GNUC)
116  return (ui32)__builtin_clz(val);
117  #else
118  val |= (val >> 1);
119  val |= (val >> 2);
120  val |= (val >> 4);
121  val |= (val >> 8);
122  val |= (val >> 16);
123  return 32 - population_count(val);
124  #endif
125  }
126 
128 #ifdef OJPH_COMPILER_MSVC
129  #pragma intrinsic(_BitScanForward)
130 #endif
131  static inline ui32 count_trailing_zeros(ui32 val)
132  {
133  #ifdef OJPH_COMPILER_MSVC
134  unsigned long result = 0;
135  _BitScanForward(&result, val);
136  return (ui32)result;
137  #elif (defined OJPH_COMPILER_GNUC)
138  return (ui32)__builtin_ctz(val);
139  #else
140  val |= (val << 1);
141  val |= (val << 2);
142  val |= (val << 4);
143  val |= (val << 8);
144  val |= (val << 16);
145  return 32 - population_count(val);
146  #endif
147  }
148 
150  static inline si32 ojph_round(float val)
151  {
152  #ifdef OJPH_COMPILER_MSVC
153  return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
154  #elif (defined OJPH_COMPILER_GNUC)
155  return (si32)(val + (val >= 0.0f ? 0.5f : -0.5f));
156  #else
157  return (si32)round(val);
158  #endif
159  }
160 
162  static inline si32 ojph_trunc(float val)
163  {
164  #ifdef OJPH_COMPILER_MSVC
165  return (si32)(val);
166  #elif (defined OJPH_COMPILER_GNUC)
167  return (si32)(val);
168  #else
169  return (si32)trunc(val);
170  #endif
171  }
172 
174  // constants
176  const ui32 byte_alignment = 32; //32 bytes == 256 bits
179 
181  // templates for alignment
183 
185  // finds the size such that it is a multiple of byte_alignment
186  template <typename T, int N>
187  size_t calc_aligned_size(size_t size) {
188  size = size * sizeof(T) + N - 1;
189  size &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
190  size >>= (31 - count_leading_zeros(sizeof(T)));
191  return size;
192  }
193 
195  // moves the pointer to first address that is a multiple of byte_alignment
196  template <typename T, int N>
197  inline T *align_ptr(T *ptr) {
198  intptr_t p = reinterpret_cast<intptr_t>(ptr);
199  p += N - 1;
200  p &= ~((1ULL << (31 - count_leading_zeros(N))) - 1);
201  return reinterpret_cast<T *>(p);
202  }
203 
205  // OS detection definitions
207 #if (defined WIN32) || (defined _WIN32) || (defined _WIN64)
208  #define OJPH_OS_WINDOWS
209 #elif (defined __APPLE__)
210  #define OJPH_OS_APPLE
211 #elif (defined __linux)
212  #define OJPH_OS_LINUX
213 #endif
214 
216  // defines for dll
218 #if defined(OJPH_OS_WINDOWS) && defined(OJPH_BUILD_SHARED_LIBRARY)
219  #define OJPH_EXPORT __declspec(dllexport)
220 #else
221  #define OJPH_EXPORT
222 #endif
223 
224 }
225 
226 #endif // !OJPH_ARCH_H
const ui32 object_alignment
Definition: ojph_arch.h:178
const ui32 byte_alignment
Definition: ojph_arch.h:176
static si32 ojph_round(float val)
Definition: ojph_arch.h:150
size_t calc_aligned_size(size_t size)
Definition: ojph_arch.h:187
@ X86_CPU_EXT_LEVEL_AVX2
Definition: ojph_arch.h:83
@ X86_CPU_EXT_LEVEL_AVX
Definition: ojph_arch.h:82
@ X86_CPU_EXT_LEVEL_AVX512
Definition: ojph_arch.h:85
@ X86_CPU_EXT_LEVEL_GENERIC
Definition: ojph_arch.h:74
@ X86_CPU_EXT_LEVEL_SSE2
Definition: ojph_arch.h:77
@ X86_CPU_EXT_LEVEL_SSE41
Definition: ojph_arch.h:80
@ X86_CPU_EXT_LEVEL_SSE
Definition: ojph_arch.h:76
@ X86_CPU_EXT_LEVEL_MMX
Definition: ojph_arch.h:75
@ X86_CPU_EXT_LEVEL_SSE42
Definition: ojph_arch.h:81
@ X86_CPU_EXT_LEVEL_SSSE3
Definition: ojph_arch.h:79
@ X86_CPU_EXT_LEVEL_SSE3
Definition: ojph_arch.h:78
@ X86_CPU_EXT_LEVEL_AVX2FMA
Definition: ojph_arch.h:84
static ui32 population_count(ui32 val)
Definition: ojph_arch.h:89
int get_cpu_ext_level()
Definition: ojph_arch.cpp:182
static si32 ojph_trunc(float val)
Definition: ojph_arch.h:162
T * align_ptr(T *ptr)
Definition: ojph_arch.h:197
static ui32 count_trailing_zeros(ui32 val)
Definition: ojph_arch.h:131
static ui32 count_leading_zeros(ui32 val)
Definition: ojph_arch.h:109
int32_t si32
Definition: ojph_defs.h:55
const ui32 log_byte_alignment
Definition: ojph_arch.h:177
uint32_t ui32
Definition: ojph_defs.h:54