Grok 10.0.5
test_util-inl.h
Go to the documentation of this file.
1// Copyright 2019 Google LLC
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16// Target-specific helper functions for use by *_test.cc.
17
18#include <stdint.h>
19
20#include "hwy/base.h"
21#include "hwy/tests/hwy_gtest.h"
22#include "hwy/tests/test_util.h"
23
24// After test_util (also includes highway.h)
25#include "hwy/print-inl.h"
26
27// Per-target include guard
28#if defined(HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_) == \
29 defined(HWY_TARGET_TOGGLE)
30#ifdef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
31#undef HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
32#else
33#define HIGHWAY_HWY_TESTS_TEST_UTIL_INL_H_
34#endif
35
37namespace hwy {
38namespace HWY_NAMESPACE {
39
40// Compare expected vector to vector.
41// HWY_INLINE works around a Clang SVE compiler bug where all but the first
42// 128 bits (the NEON register) of actual are zero.
43template <class D, typename T = TFromD<D>, class V = Vec<D>>
44HWY_INLINE void AssertVecEqual(D d, const T* expected, VecArg<V> actual,
45 const char* filename, const int line) {
46 const size_t N = Lanes(d);
47 auto actual_lanes = AllocateAligned<T>(N);
48 Store(actual, d, actual_lanes.get());
49
50 const auto info = hwy::detail::MakeTypeInfo<T>();
51 const char* target_name = hwy::TargetName(HWY_TARGET);
52 hwy::detail::AssertArrayEqual(info, expected, actual_lanes.get(), N,
53 target_name, filename, line);
54}
55
56// Compare expected lanes to vector.
57// HWY_INLINE works around a Clang SVE compiler bug where all but the first
58// 128 bits (the NEON register) of actual are zero.
59template <class D, typename T = TFromD<D>, class V = Vec<D>>
61 const char* filename, int line) {
62 auto expected_lanes = AllocateAligned<T>(Lanes(d));
63 Store(expected, d, expected_lanes.get());
64 AssertVecEqual(d, expected_lanes.get(), actual, filename, line);
65}
66
67// Only checks the valid mask elements (those whose index < Lanes(d)).
68template <class D>
70 const char* filename, int line) {
71 // lvalues prevented MSAN failure in farm_sve.
72 const Vec<D> va = VecFromMask(d, a);
73 const Vec<D> vb = VecFromMask(d, b);
74 AssertVecEqual(d, va, vb, filename, line);
75
76 const char* target_name = hwy::TargetName(HWY_TARGET);
77 AssertEqual(CountTrue(d, a), CountTrue(d, b), target_name, filename, line);
78 AssertEqual(AllTrue(d, a), AllTrue(d, b), target_name, filename, line);
79 AssertEqual(AllFalse(d, a), AllFalse(d, b), target_name, filename, line);
80
81 const size_t N = Lanes(d);
82#if HWY_TARGET == HWY_SCALAR
83 const Rebind<uint8_t, D> d8;
84#else
86#endif
87 const size_t N8 = Lanes(d8);
88 auto bits_a = AllocateAligned<uint8_t>(HWY_MAX(size_t{8}, N8));
89 auto bits_b = AllocateAligned<uint8_t>(size_t{HWY_MAX(8, N8)});
90 memset(bits_a.get(), 0, N8);
91 memset(bits_b.get(), 0, N8);
92 const size_t num_bytes_a = StoreMaskBits(d, a, bits_a.get());
93 const size_t num_bytes_b = StoreMaskBits(d, b, bits_b.get());
94 AssertEqual(num_bytes_a, num_bytes_b, target_name, filename, line);
95 size_t i = 0;
96 // First check whole bytes (if that many elements are still valid)
97 for (; i < N / 8; ++i) {
98 if (bits_a[i] != bits_b[i]) {
99 fprintf(stderr, "Mismatch in byte %d: %d != %d\n", static_cast<int>(i),
100 bits_a[i], bits_b[i]);
101 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
102 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
103 hwy::Abort(filename, line, "Masks not equal");
104 }
105 }
106 // Then the valid bit(s) in the last byte.
107 const size_t remainder = N % 8;
108 if (remainder != 0) {
109 const int mask = (1 << remainder) - 1;
110 const int valid_a = bits_a[i] & mask;
111 const int valid_b = bits_b[i] & mask;
112 if (valid_a != valid_b) {
113 fprintf(stderr, "Mismatch in last byte %d: %d != %d\n",
114 static_cast<int>(i), valid_a, valid_b);
115 Print(d8, "expect", Load(d8, bits_a.get()), 0, N8);
116 Print(d8, "actual", Load(d8, bits_b.get()), 0, N8);
117 hwy::Abort(filename, line, "Masks not equal");
118 }
119 }
120}
121
122// Only sets valid elements (those whose index < Lanes(d)). This helps catch
123// tests that are not masking off the (undefined) upper mask elements.
124//
125// TODO(janwas): with HWY_NOINLINE GCC zeros the upper half of AVX2 masks.
126template <class D>
128 return FirstN(d, Lanes(d));
129}
130
131template <class D>
133 const auto zero = Zero(RebindToSigned<D>());
134 return RebindMask(d, Lt(zero, zero));
135}
136
137#ifndef HWY_ASSERT_EQ
138
139#define HWY_ASSERT_EQ(expected, actual) \
140 hwy::AssertEqual(expected, actual, hwy::TargetName(HWY_TARGET), __FILE__, \
141 __LINE__)
142
143#define HWY_ASSERT_ARRAY_EQ(expected, actual, count) \
144 hwy::AssertArrayEqual(expected, actual, count, hwy::TargetName(HWY_TARGET), \
145 __FILE__, __LINE__)
146
147#define HWY_ASSERT_STRING_EQ(expected, actual) \
148 hwy::AssertStringEqual(expected, actual, hwy::TargetName(HWY_TARGET), \
149 __FILE__, __LINE__)
150
151#define HWY_ASSERT_VEC_EQ(d, expected, actual) \
152 AssertVecEqual(d, expected, actual, __FILE__, __LINE__)
153
154#define HWY_ASSERT_MASK_EQ(d, expected, actual) \
155 AssertMaskEqual(d, expected, actual, __FILE__, __LINE__)
156
157#endif // HWY_ASSERT_EQ
158
159namespace detail {
160
161// Helpers for instantiating tests with combinations of lane types / counts.
162
163// Calls Test for each CappedTag<T, N> where N is in [kMinLanes, kMul * kMinArg]
164// and the resulting Lanes() is in [min_lanes, max_lanes]. The upper bound
165// is required to ensure capped vectors remain extendable. Implemented by
166// recursively halving kMul until it is zero.
167template <typename T, size_t kMul, size_t kMinArg, class Test>
169 static void Do(size_t min_lanes, size_t max_lanes) {
171
172 // If we already don't have enough lanes, stop.
173 const size_t lanes = Lanes(d);
174 if (lanes < min_lanes) return;
175
176 if (lanes <= max_lanes) {
177 Test()(T(), d);
178 }
179 ForeachCappedR<T, kMul / 2, kMinArg, Test>::Do(min_lanes, max_lanes);
180 }
181};
182
183// Base case to stop the recursion.
184template <typename T, size_t kMinArg, class Test>
185struct ForeachCappedR<T, 0, kMinArg, Test> {
186 static void Do(size_t, size_t) {}
187};
188
189#if HWY_HAVE_SCALABLE
190
191template <typename T>
192constexpr int MinPow2() {
193 // Highway follows RVV LMUL in that the smallest fraction is 1/8th (encoded
194 // as kPow2 == -3). The fraction also must not result in zero lanes for the
195 // smallest possible vector size, which is 128 bits even on RISC-V (with the
196 // application processor profile).
197 return HWY_MAX(-3, -static_cast<int>(CeilLog2(16 / sizeof(T))));
198}
199
200// Iterates kPow2 upward through +3.
201template <typename T, int kPow2, int kAddPow2, class Test>
202struct ForeachShiftR {
203 static void Do(size_t min_lanes) {
204 const ScalableTag<T, kPow2 + kAddPow2> d;
205
206 // Precondition: [kPow2, 3] + kAddPow2 is a valid fraction of the minimum
207 // vector size, so we always have enough lanes, except ForGEVectors.
208 if (Lanes(d) >= min_lanes) {
209 Test()(T(), d);
210 } else {
211 fprintf(stderr, "%d lanes < %d: T=%d pow=%d\n",
212 static_cast<int>(Lanes(d)), static_cast<int>(min_lanes),
213 static_cast<int>(sizeof(T)), kPow2 + kAddPow2);
214 HWY_ASSERT(min_lanes != 1);
215 }
216
217 ForeachShiftR<T, kPow2 + 1, kAddPow2, Test>::Do(min_lanes);
218 }
219};
220
221// Base case to stop the recursion.
222template <typename T, int kAddPow2, class Test>
223struct ForeachShiftR<T, 4, kAddPow2, Test> {
224 static void Do(size_t) {}
225};
226#else
227// ForeachCappedR already handled all possible sizes.
228#endif // HWY_HAVE_SCALABLE
229
230} // namespace detail
231
232// These 'adapters' call a test for all possible N or kPow2 subject to
233// constraints such as "vectors must be extendable" or "vectors >= 128 bits".
234// They may be called directly, or via For*Types. Note that for an adapter C,
235// `C<Test>(T())` does not call the test - the correct invocation is
236// `C<Test>()(T())`, or preferably `ForAllTypes(C<Test>())`. We check at runtime
237// that operator() is called to prevent such bugs. Note that this is not
238// thread-safe, but that is fine because C are typically local variables.
239
240// Calls Test for all power of two N in [1, Lanes(d) >> kPow2]. This is for
241// ops that widen their input, e.g. Combine (not supported by HWY_SCALAR).
242template <class Test, int kPow2 = 1>
244 mutable bool called_ = false;
245
246 public:
248 if (!called_) {
249 HWY_ABORT("Test is incorrect, ensure operator() is called");
250 }
251 }
252
253 template <typename T>
254 void operator()(T /*unused*/) const {
255 called_ = true;
256 constexpr size_t kMaxCapped = HWY_LANES(T);
257 // Skip CappedTag that are already full vectors.
258 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
259 (void)kMaxCapped;
260 (void)max_lanes;
261#if HWY_TARGET == HWY_SCALAR
262 // not supported
263#else
264 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(1, max_lanes);
265#if HWY_TARGET == HWY_RVV
266 // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
267 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(1);
268#elif HWY_HAVE_SCALABLE
269 // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
270 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
271 Test>::Do(1);
272#endif
273#endif // HWY_SCALAR
274 }
275};
276
277// Calls Test for all power of two N in [1 << kPow2, Lanes(d)]. This is for ops
278// that narrow their input, e.g. UpperHalf.
279template <class Test, int kPow2 = 1>
281 mutable bool called_ = false;
282
283 public:
285 if (!called_) {
286 HWY_ABORT("Test is incorrect, ensure operator() is called");
287 }
288 }
289
290 template <typename T>
291 void operator()(T /*unused*/) const {
292 called_ = true;
293 constexpr size_t kMinLanes = size_t{1} << kPow2;
294 constexpr size_t kMaxCapped = HWY_LANES(T);
295 // For shrinking, an upper limit is unnecessary.
296 constexpr size_t max_lanes = kMaxCapped;
297
298 (void)kMinLanes;
299 (void)max_lanes;
300 (void)max_lanes;
301#if HWY_TARGET == HWY_SCALAR
302 // not supported
303#else
304 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
305 kMinLanes, max_lanes);
306#if HWY_TARGET == HWY_RVV
307 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
308 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
309 kMinLanes);
310#elif HWY_HAVE_SCALABLE
311 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
312 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
313 kMinLanes);
314#endif
315#endif // HWY_TARGET == HWY_SCALAR
316 }
317};
318
319// Calls Test for all supported power of two vectors of at least kMinBits.
320// Examples: AES or 64x64 require 128 bits, casts may require 64 bits.
321template <size_t kMinBits, class Test>
323 mutable bool called_ = false;
324
325 public:
327 if (!called_) {
328 HWY_ABORT("Test is incorrect, ensure operator() is called");
329 }
330 }
331
332 template <typename T>
333 void operator()(T /*unused*/) const {
334 called_ = true;
335 constexpr size_t kMaxCapped = HWY_LANES(T);
336 constexpr size_t kMinLanes = kMinBits / 8 / sizeof(T);
337 // An upper limit is unnecessary.
338 constexpr size_t max_lanes = kMaxCapped;
339 (void)max_lanes;
340#if HWY_TARGET == HWY_SCALAR
341 (void)kMinLanes; // not supported
342#else
343 detail::ForeachCappedR<T, HWY_LANES(T) / kMinLanes, kMinLanes, Test>::Do(
344 kMinLanes, max_lanes);
345#if HWY_TARGET == HWY_RVV
346 // Can be 0 (handled below) if kMinBits > 64.
347 constexpr size_t kRatio = 128 / kMinBits;
348 constexpr int kMinPow2 =
349 kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
350 // For each [kMinPow2, 3]; counter is [kMinPow2, 3].
351 detail::ForeachShiftR<T, kMinPow2, 0, Test>::Do(kMinLanes);
352#elif HWY_HAVE_SCALABLE
353 // Can be 0 (handled below) if kMinBits > 128.
354 constexpr size_t kRatio = 128 / kMinBits;
355 constexpr int kMinPow2 =
356 kRatio == 0 ? 0 : -static_cast<int>(CeilLog2(kRatio));
357 // For each [kMinPow2, 0]; counter is [kMinPow2 + 3, 3].
358 detail::ForeachShiftR<T, kMinPow2 + 3, -3, Test>::Do(kMinLanes);
359#endif
360#endif // HWY_TARGET == HWY_SCALAR
361 }
362};
363
364template <class Test>
366
367// Calls Test for all N that can be promoted (not the same as Extendable because
368// HWY_SCALAR has one lane). Also used for ZipLower, but not ZipUpper.
369template <class Test, int kPow2 = 1>
371 mutable bool called_ = false;
372
373 public:
375 if (!called_) {
376 HWY_ABORT("Test is incorrect, ensure operator() is called");
377 }
378 }
379
380 template <typename T>
381 void operator()(T /*unused*/) const {
382 called_ = true;
383 constexpr size_t kFactor = size_t{1} << kPow2;
384 static_assert(kFactor >= 2 && kFactor * sizeof(T) <= sizeof(uint64_t), "");
385 constexpr size_t kMaxCapped = HWY_LANES(T);
386 constexpr size_t kMinLanes = kFactor;
387 // Skip CappedTag that are already full vectors.
388 const size_t max_lanes = Lanes(ScalableTag<T>()) >> kPow2;
389 (void)kMaxCapped;
390 (void)kMinLanes;
391 (void)max_lanes;
392#if HWY_TARGET == HWY_SCALAR
394#else
395 // TODO(janwas): call Extendable if kMinLanes check not required?
396 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), 1, Test>::Do(kMinLanes,
397 max_lanes);
398#if HWY_TARGET == HWY_RVV
399 // For each [MinPow2, 3 - kPow2]; counter is [MinPow2 + kPow2, 3].
400 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, -kPow2, Test>::Do(
401 kMinLanes);
402#elif HWY_HAVE_SCALABLE
403 // For each [MinPow2, 0 - kPow2]; counter is [MinPow2 + kPow2 + 3, 3].
404 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -kPow2 - 3,
405 Test>::Do(kMinLanes);
406#endif
407#endif // HWY_SCALAR
408 }
409};
410
411// Calls Test for all N than can be demoted (not the same as Shrinkable because
412// HWY_SCALAR has one lane).
413template <class Test, int kPow2 = 1>
415 mutable bool called_ = false;
416
417 public:
419 if (!called_) {
420 HWY_ABORT("Test is incorrect, ensure operator() is called");
421 }
422 }
423
424 template <typename T>
425 void operator()(T /*unused*/) const {
426 called_ = true;
427 constexpr size_t kMinLanes = size_t{1} << kPow2;
428 constexpr size_t kMaxCapped = HWY_LANES(T);
429 // For shrinking, an upper limit is unnecessary.
430 constexpr size_t max_lanes = kMaxCapped;
431
432 (void)kMinLanes;
433 (void)max_lanes;
434 (void)max_lanes;
435#if HWY_TARGET == HWY_SCALAR
437#else
438 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
439 kMinLanes, max_lanes);
440
441// TODO(janwas): call Extendable if kMinLanes check not required?
442#if HWY_TARGET == HWY_RVV
443 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
444 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
445 kMinLanes);
446#elif HWY_HAVE_SCALABLE
447 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
448 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
449 kMinLanes);
450#endif
451#endif // HWY_TARGET == HWY_SCALAR
452 }
453};
454
455// For LowerHalf/Quarter.
456template <class Test, int kPow2 = 1>
458 mutable bool called_ = false;
459
460 public:
462 if (!called_) {
463 HWY_ABORT("Test is incorrect, ensure operator() is called");
464 }
465 }
466
467 template <typename T>
468 void operator()(T /*unused*/) const {
469 called_ = true;
470#if HWY_TARGET == HWY_SCALAR
472#else
473 constexpr size_t kMinLanes = size_t{1} << kPow2;
474 // For shrinking, an upper limit is unnecessary.
475 constexpr size_t kMaxCapped = HWY_LANES(T);
476 detail::ForeachCappedR<T, (kMaxCapped >> kPow2), kMinLanes, Test>::Do(
477 kMinLanes, kMaxCapped);
478
479// TODO(janwas): call Extendable if kMinLanes check not required?
480#if HWY_TARGET == HWY_RVV
481 // For each [MinPow2 + kPow2, 3]; counter is [MinPow2 + kPow2, 3].
482 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2, 0, Test>::Do(
483 kMinLanes);
484#elif HWY_HAVE_SCALABLE
485 // For each [MinPow2 + kPow2, 0]; counter is [MinPow2 + kPow2 + 3, 3].
486 detail::ForeachShiftR<T, detail::MinPow2<T>() + kPow2 + 3, -3, Test>::Do(
487 kMinLanes);
488#endif
489#endif // HWY_TARGET == HWY_SCALAR
490 }
491};
492
493// Calls Test for all power of two N in [1, Lanes(d)]. This is the default
494// for ops that do not narrow nor widen their input, nor require 128 bits.
495template <class Test>
497 mutable bool called_ = false;
498
499 public:
501 if (!called_) {
502 HWY_ABORT("Test is incorrect, ensure operator() is called");
503 }
504 }
505
506 template <typename T>
507 void operator()(T t) const {
508 called_ = true;
509#if HWY_TARGET == HWY_SCALAR
510 (void)t;
512#else
514#endif
515 }
516};
517
518// Type lists to shorten call sites:
519
520template <class Func>
521void ForSignedTypes(const Func& func) {
522 func(int8_t());
523 func(int16_t());
524 func(int32_t());
525#if HWY_HAVE_INTEGER64
526 func(int64_t());
527#endif
528}
529
530template <class Func>
531void ForUnsignedTypes(const Func& func) {
532 func(uint8_t());
533 func(uint16_t());
534 func(uint32_t());
535#if HWY_HAVE_INTEGER64
536 func(uint64_t());
537#endif
538}
539
540template <class Func>
541void ForIntegerTypes(const Func& func) {
542 ForSignedTypes(func);
543 ForUnsignedTypes(func);
544}
545
546template <class Func>
547void ForFloatTypes(const Func& func) {
548 func(float());
549#if HWY_HAVE_FLOAT64
550 func(double());
551#endif
552}
553
554template <class Func>
555void ForAllTypes(const Func& func) {
556 ForIntegerTypes(func);
557 ForFloatTypes(func);
558}
559
560template <class Func>
561void ForUI8(const Func& func) {
562 func(uint8_t());
563 func(int8_t());
564}
565
566template <class Func>
567void ForUI16(const Func& func) {
568 func(uint16_t());
569 func(int16_t());
570}
571
572template <class Func>
573void ForUIF16(const Func& func) {
574 ForUI16(func);
575#if HWY_HAVE_FLOAT16
576 func(float16_t());
577#endif
578}
579
580template <class Func>
581void ForUI32(const Func& func) {
582 func(uint32_t());
583 func(int32_t());
584}
585
586template <class Func>
587void ForUIF32(const Func& func) {
588 ForUI32(func);
589 func(float());
590}
591
592template <class Func>
593void ForUI64(const Func& func) {
594#if HWY_HAVE_INTEGER64
595 func(uint64_t());
596 func(int64_t());
597#endif
598}
599
600template <class Func>
601void ForUIF64(const Func& func) {
602 ForUI64(func);
603#if HWY_HAVE_FLOAT64
604 func(double());
605#endif
606}
607
608template <class Func>
609void ForUI3264(const Func& func) {
610 ForUI32(func);
611 ForUI64(func);
612}
613
614template <class Func>
615void ForUIF3264(const Func& func) {
616 ForUIF32(func);
617 ForUIF64(func);
618}
619
620template <class Func>
621void ForUI163264(const Func& func) {
622 ForUI16(func);
623 ForUI3264(func);
624}
625
626template <class Func>
627void ForUIF163264(const Func& func) {
628 ForUIF16(func);
629 ForUIF3264(func);
630}
631
632// For tests that involve loops, adjust the trip count so that emulated tests
633// finish quickly (but always at least 2 iterations to ensure some diversity).
634constexpr size_t AdjustedReps(size_t max_reps) {
635#if HWY_ARCH_RVV
636 return HWY_MAX(max_reps / 32, 2);
637#elif HWY_IS_DEBUG_BUILD
638 return HWY_MAX(max_reps / 8, 2);
639#elif HWY_ARCH_ARM
640 return HWY_MAX(max_reps / 4, 2);
641#else
642 return HWY_MAX(max_reps, 2);
643#endif
644}
645
646// Same as above, but the loop trip count will be 1 << max_pow2.
647constexpr size_t AdjustedLog2Reps(size_t max_pow2) {
648 // If "negative" (unsigned wraparound), use original.
649#if HWY_ARCH_RVV
650 return HWY_MIN(max_pow2 - 4, max_pow2);
651#elif HWY_IS_DEBUG_BUILD
652 return HWY_MIN(max_pow2 - 1, max_pow2);
653#elif HWY_ARCH_ARM
654 return HWY_MIN(max_pow2 - 1, max_pow2);
655#else
656 return max_pow2;
657#endif
658}
659
660// NOLINTNEXTLINE(google-readability-namespace-comments)
661} // namespace HWY_NAMESPACE
662} // namespace hwy
664
665#endif // per-target include guard
#define HWY_MAX(a, b)
Definition: base.h:135
#define HWY_NOINLINE
Definition: base.h:72
#define HWY_MIN(a, b)
Definition: base.h:134
#define HWY_ABORT(format,...)
Definition: base.h:188
#define HWY_INLINE
Definition: base.h:70
#define HWY_ASSERT(condition)
Definition: base.h:192
Definition: test_util-inl.h:414
~ForDemoteVectors()
Definition: test_util-inl.h:418
void operator()(T) const
Definition: test_util-inl.h:425
bool called_
Definition: test_util-inl.h:415
Definition: test_util-inl.h:243
void operator()(T) const
Definition: test_util-inl.h:254
bool called_
Definition: test_util-inl.h:244
~ForExtendableVectors()
Definition: test_util-inl.h:247
Definition: test_util-inl.h:322
bool called_
Definition: test_util-inl.h:323
~ForGEVectors()
Definition: test_util-inl.h:326
void operator()(T) const
Definition: test_util-inl.h:333
Definition: test_util-inl.h:457
~ForHalfVectors()
Definition: test_util-inl.h:461
bool called_
Definition: test_util-inl.h:458
void operator()(T) const
Definition: test_util-inl.h:468
Definition: test_util-inl.h:496
bool called_
Definition: test_util-inl.h:497
void operator()(T t) const
Definition: test_util-inl.h:507
~ForPartialVectors()
Definition: test_util-inl.h:500
Definition: test_util-inl.h:370
~ForPromoteVectors()
Definition: test_util-inl.h:374
bool called_
Definition: test_util-inl.h:371
void operator()(T) const
Definition: test_util-inl.h:381
Definition: test_util-inl.h:280
void operator()(T) const
Definition: test_util-inl.h:291
bool called_
Definition: test_util-inl.h:281
~ForShrinkableVectors()
Definition: test_util-inl.h:284
#define HWY_TARGET
Definition: detect_targets.h:380
d
Definition: rvv-inl.h:1998
V VecArg
Definition: ops/shared-inl.h:324
HWY_API Mask128< TTo, N > RebindMask(Simd< TTo, N, 0 > dto, Mask128< TFrom, N > m)
Definition: arm_neon-inl.h:2230
constexpr size_t AdjustedReps(size_t max_reps)
Definition: test_util-inl.h:634
HWY_API bool AllTrue(const Full128< T > d, const Mask128< T > m)
Definition: arm_neon-inl.h:5716
void ForUIF32(const Func &func)
Definition: test_util-inl.h:587
void ForUI163264(const Func &func)
Definition: test_util-inl.h:621
HWY_API Mask128< T, N > FirstN(const Simd< T, N, 0 > d, size_t num)
Definition: arm_neon-inl.h:2456
HWY_API size_t StoreMaskBits(Simd< T, N, 0 >, const Mask128< T, N > mask, uint8_t *bits)
Definition: arm_neon-inl.h:5701
void ForUIF3264(const Func &func)
Definition: test_util-inl.h:615
void ForUIF163264(const Func &func)
Definition: test_util-inl.h:627
constexpr size_t AdjustedLog2Reps(size_t max_pow2)
Definition: test_util-inl.h:647
typename detail::CappedTagChecker< T, kLimit >::type CappedTag
Definition: ops/shared-inl.h:184
void ForUI32(const Func &func)
Definition: test_util-inl.h:581
void ForAllTypes(const Func &func)
Definition: test_util-inl.h:555
Rebind< MakeSigned< TFromD< D > >, D > RebindToSigned
Definition: ops/shared-inl.h:210
void ForFloatTypes(const Func &func)
Definition: test_util-inl.h:547
void Print(const D d, const char *caption, VecArg< V > v, size_t lane_u=0, size_t max_lanes=7)
Definition: print-inl.h:39
HWY_NOINLINE void AssertMaskEqual(D d, VecArg< Mask< D > > a, VecArg< Mask< D > > b, const char *filename, int line)
Definition: test_util-inl.h:69
HWY_API size_t CountTrue(Full128< T >, const Mask128< T > mask)
Definition: arm_neon-inl.h:5671
HWY_API Vec128< T, N > VecFromMask(Simd< T, N, 0 > d, const Mask128< T, N > v)
Definition: arm_neon-inl.h:2223
HWY_INLINE void AssertVecEqual(D d, const T *expected, VecArg< V > actual, const char *filename, const int line)
Definition: test_util-inl.h:44
void ForIntegerTypes(const Func &func)
Definition: test_util-inl.h:541
HWY_API constexpr size_t Lanes(Simd< T, N, kPow2 >)
Definition: arm_sve-inl.h:243
HWY_API Vec128< T, N > Load(Simd< T, N, 0 > d, const T *HWY_RESTRICT p)
Definition: arm_neon-inl.h:2753
HWY_INLINE Mask< D > MaskFalse(const D d)
Definition: test_util-inl.h:132
void ForUI8(const Func &func)
Definition: test_util-inl.h:561
void ForUI3264(const Func &func)
Definition: test_util-inl.h:609
typename detail::ScalableTagChecker< T, kPow2 >::type ScalableTag
Definition: ops/shared-inl.h:173
HWY_API bool AllFalse(const Simd< T, N, 0 > d, const Mask128< T, N > m)
Definition: arm_neon-inl.h:5710
void ForUIF64(const Func &func)
Definition: test_util-inl.h:601
void ForUI16(const Func &func)
Definition: test_util-inl.h:567
typename D::template Rebind< T > Rebind
Definition: ops/shared-inl.h:207
HWY_API Vec128< T, N > Zero(Simd< T, N, 0 > d)
Definition: arm_neon-inl.h:1020
void ForUI64(const Func &func)
Definition: test_util-inl.h:593
void ForSignedTypes(const Func &func)
Definition: test_util-inl.h:521
void ForUIF16(const Func &func)
Definition: test_util-inl.h:573
typename D::template Repartition< T > Repartition
Definition: ops/shared-inl.h:218
decltype(MaskFromVec(Zero(D()))) Mask
Definition: generic_ops-inl.h:46
N
Definition: rvv-inl.h:1998
HWY_INLINE Mask< D > MaskTrue(const D d)
Definition: test_util-inl.h:127
HWY_API void Store(Vec128< T, N > v, Simd< T, N, 0 > d, T *HWY_RESTRICT aligned)
Definition: arm_neon-inl.h:2934
void ForUnsignedTypes(const Func &func)
Definition: test_util-inl.h:531
decltype(Zero(D())) Vec
Definition: generic_ops-inl.h:40
HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo &info, const void *expected_void, const void *actual_void, size_t N, const char *target_name, const char *filename, int line)
Definition: aligned_allocator.h:27
FuncOutput(*)(const void *, FuncInput) Func
Definition: nanobenchmark.h:105
static HWY_MAYBE_UNUSED const char * TargetName(int64_t target)
Definition: targets.h:85
constexpr size_t CeilLog2(TI x)
Definition: base.h:899
HWY_INLINE void AssertEqual(const T expected, const T actual, const char *target_name, const char *filename, int line, size_t lane=0)
Definition: test_util.h:152
HWY_DLLEXPORT HWY_NORETURN void int line
Definition: base.h:992
#define HWY_LANES(T)
Definition: set_macros-inl.h:85
#define HWY_NAMESPACE
Definition: set_macros-inl.h:82
static void Do(size_t, size_t)
Definition: test_util-inl.h:186
Definition: test_util-inl.h:168
static void Do(size_t min_lanes, size_t max_lanes)
Definition: test_util-inl.h:169
Definition: base.h:291
HWY_AFTER_NAMESPACE()
HWY_BEFORE_NAMESPACE()