libstdc++
simd.h
1 // Definition of the public simd interfaces -*- C++ -*-
2 
3 // Copyright (C) 2020-2023 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_H
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_H
27 
28 #if __cplusplus >= 201703L
29 
30 #include "simd_detail.h"
31 #include "numeric_traits.h"
32 #include <bit>
33 #include <bitset>
34 #ifdef _GLIBCXX_DEBUG_UB
35 #include <cstdio> // for stderr
36 #endif
37 #include <cstring>
38 #include <cmath>
39 #include <functional>
40 #include <iosfwd>
41 #include <utility>
42 
43 #if _GLIBCXX_SIMD_X86INTRIN
44 #include <x86intrin.h>
45 #elif _GLIBCXX_SIMD_HAVE_NEON
46 #pragma GCC diagnostic push
47 // narrowing conversion of '__a' from 'uint64_t' {aka 'long long unsigned int'} to
48 // 'int64x1_t' {aka 'long long int'} [-Wnarrowing]
49 #pragma GCC diagnostic ignored "-Wnarrowing"
50 #include <arm_neon.h>
51 #pragma GCC diagnostic pop
52 #endif
53 
54 /** @ingroup ts_simd
55  * @{
56  */
57 /* There are several closely related types, with the following naming
58  * convention:
59  * _Tp: vectorizable (arithmetic) type (or any type)
60  * _TV: __vector_type_t<_Tp, _Np>
61  * _TW: _SimdWrapper<_Tp, _Np>
62  * _TI: __intrinsic_type_t<_Tp, _Np>
63  * _TVT: _VectorTraits<_TV> or _VectorTraits<_TW>
64  * If one additional type is needed use _U instead of _T.
65  * Otherwise use _T\d, _TV\d, _TW\d, TI\d, _TVT\d.
66  *
67  * More naming conventions:
68  * _Ap or _Abi: An ABI tag from the simd_abi namespace
69  * _Ip: often used for integer types with sizeof(_Ip) == sizeof(_Tp),
70  * _IV, _IW as for _TV, _TW
71  * _Np: number of elements (not bytes)
72  * _Bytes: number of bytes
73  *
74  * Variable names:
75  * __k: mask object (vector- or bitmask)
76  */
77 _GLIBCXX_SIMD_BEGIN_NAMESPACE
78 
79 #if !_GLIBCXX_SIMD_X86INTRIN
80 using __m128 [[__gnu__::__vector_size__(16)]] = float;
81 using __m128d [[__gnu__::__vector_size__(16)]] = double;
82 using __m128i [[__gnu__::__vector_size__(16)]] = long long;
83 using __m256 [[__gnu__::__vector_size__(32)]] = float;
84 using __m256d [[__gnu__::__vector_size__(32)]] = double;
85 using __m256i [[__gnu__::__vector_size__(32)]] = long long;
86 using __m512 [[__gnu__::__vector_size__(64)]] = float;
87 using __m512d [[__gnu__::__vector_size__(64)]] = double;
88 using __m512i [[__gnu__::__vector_size__(64)]] = long long;
89 #endif
90 
91 namespace simd_abi {
92 // simd_abi forward declarations {{{
93 // implementation details:
94 struct _Scalar;
95 
96 template <int _Np>
97  struct _Fixed;
98 
99 // There are two major ABIs that appear on different architectures.
100 // Both have non-boolean values packed into an N Byte register
101 // -> #elements = N / sizeof(T)
102 // Masks differ:
103 // 1. Use value vector registers for masks (all 0 or all 1)
104 // 2. Use bitmasks (mask registers) with one bit per value in the corresponding
105 // value vector
106 //
107 // Both can be partially used, masking off the rest when doing horizontal
108 // operations or operations that can trap (e.g. FP_INVALID or integer division
109 // by 0). This is encoded as the number of used bytes.
110 template <int _UsedBytes>
111  struct _VecBuiltin;
112 
113 template <int _UsedBytes>
114  struct _VecBltnBtmsk;
115 
116 template <typename _Tp, int _Np>
117  using _VecN = _VecBuiltin<sizeof(_Tp) * _Np>;
118 
119 template <int _UsedBytes = 16>
120  using _Sse = _VecBuiltin<_UsedBytes>;
121 
122 template <int _UsedBytes = 32>
123  using _Avx = _VecBuiltin<_UsedBytes>;
124 
125 template <int _UsedBytes = 64>
126  using _Avx512 = _VecBltnBtmsk<_UsedBytes>;
127 
128 template <int _UsedBytes = 16>
129  using _Neon = _VecBuiltin<_UsedBytes>;
130 
131 // implementation-defined:
132 using __sse = _Sse<>;
133 using __avx = _Avx<>;
134 using __avx512 = _Avx512<>;
135 using __neon = _Neon<>;
136 using __neon128 = _Neon<16>;
137 using __neon64 = _Neon<8>;
138 
139 // standard:
140 template <typename _Tp, size_t _Np, typename...>
141  struct deduce;
142 
143 template <int _Np>
144  using fixed_size = _Fixed<_Np>;
145 
146 using scalar = _Scalar;
147 
148 // }}}
149 } // namespace simd_abi
150 // forward declarations is_simd(_mask), simd(_mask), simd_size {{{
151 template <typename _Tp>
152  struct is_simd;
153 
154 template <typename _Tp>
155  struct is_simd_mask;
156 
157 template <typename _Tp, typename _Abi>
158  class simd;
159 
160 template <typename _Tp, typename _Abi>
161  class simd_mask;
162 
163 template <typename _Tp, typename _Abi>
164  struct simd_size;
165 
166 // }}}
167 // load/store flags {{{
168 struct element_aligned_tag
169 {
170  template <typename _Tp, typename _Up = typename _Tp::value_type>
171  static constexpr size_t _S_alignment = alignof(_Up);
172 
173  template <typename _Tp, typename _Up>
174  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
175  _S_apply(_Up* __ptr)
176  { return __ptr; }
177 };
178 
179 struct vector_aligned_tag
180 {
181  template <typename _Tp, typename _Up = typename _Tp::value_type>
182  static constexpr size_t _S_alignment
183  = std::__bit_ceil(sizeof(_Up) * _Tp::size());
184 
185  template <typename _Tp, typename _Up>
186  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
187  _S_apply(_Up* __ptr)
188  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _S_alignment<_Tp, _Up>)); }
189 };
190 
191 template <size_t _Np> struct overaligned_tag
192 {
193  template <typename _Tp, typename _Up = typename _Tp::value_type>
194  static constexpr size_t _S_alignment = _Np;
195 
196  template <typename _Tp, typename _Up>
197  _GLIBCXX_SIMD_INTRINSIC static constexpr _Up*
198  _S_apply(_Up* __ptr)
199  { return static_cast<_Up*>(__builtin_assume_aligned(__ptr, _Np)); }
200 };
201 
202 inline constexpr element_aligned_tag element_aligned = {};
203 
204 inline constexpr vector_aligned_tag vector_aligned = {};
205 
206 template <size_t _Np>
207  inline constexpr overaligned_tag<_Np> overaligned = {};
208 
209 // }}}
210 template <size_t _Xp>
211  using _SizeConstant = integral_constant<size_t, _Xp>;
212 // constexpr feature detection{{{
213 constexpr inline bool __have_mmx = _GLIBCXX_SIMD_HAVE_MMX;
214 constexpr inline bool __have_sse = _GLIBCXX_SIMD_HAVE_SSE;
215 constexpr inline bool __have_sse2 = _GLIBCXX_SIMD_HAVE_SSE2;
216 constexpr inline bool __have_sse3 = _GLIBCXX_SIMD_HAVE_SSE3;
217 constexpr inline bool __have_ssse3 = _GLIBCXX_SIMD_HAVE_SSSE3;
218 constexpr inline bool __have_sse4_1 = _GLIBCXX_SIMD_HAVE_SSE4_1;
219 constexpr inline bool __have_sse4_2 = _GLIBCXX_SIMD_HAVE_SSE4_2;
220 constexpr inline bool __have_xop = _GLIBCXX_SIMD_HAVE_XOP;
221 constexpr inline bool __have_avx = _GLIBCXX_SIMD_HAVE_AVX;
222 constexpr inline bool __have_avx2 = _GLIBCXX_SIMD_HAVE_AVX2;
223 constexpr inline bool __have_bmi = _GLIBCXX_SIMD_HAVE_BMI1;
224 constexpr inline bool __have_bmi2 = _GLIBCXX_SIMD_HAVE_BMI2;
225 constexpr inline bool __have_lzcnt = _GLIBCXX_SIMD_HAVE_LZCNT;
226 constexpr inline bool __have_sse4a = _GLIBCXX_SIMD_HAVE_SSE4A;
227 constexpr inline bool __have_fma = _GLIBCXX_SIMD_HAVE_FMA;
228 constexpr inline bool __have_fma4 = _GLIBCXX_SIMD_HAVE_FMA4;
229 constexpr inline bool __have_f16c = _GLIBCXX_SIMD_HAVE_F16C;
230 constexpr inline bool __have_popcnt = _GLIBCXX_SIMD_HAVE_POPCNT;
231 constexpr inline bool __have_avx512f = _GLIBCXX_SIMD_HAVE_AVX512F;
232 constexpr inline bool __have_avx512dq = _GLIBCXX_SIMD_HAVE_AVX512DQ;
233 constexpr inline bool __have_avx512vl = _GLIBCXX_SIMD_HAVE_AVX512VL;
234 constexpr inline bool __have_avx512bw = _GLIBCXX_SIMD_HAVE_AVX512BW;
235 constexpr inline bool __have_avx512dq_vl = __have_avx512dq && __have_avx512vl;
236 constexpr inline bool __have_avx512bw_vl = __have_avx512bw && __have_avx512vl;
237 constexpr inline bool __have_avx512bitalg = _GLIBCXX_SIMD_HAVE_AVX512BITALG;
238 constexpr inline bool __have_avx512vbmi2 = _GLIBCXX_SIMD_HAVE_AVX512VBMI2;
239 constexpr inline bool __have_avx512vbmi = _GLIBCXX_SIMD_HAVE_AVX512VBMI;
240 constexpr inline bool __have_avx512ifma = _GLIBCXX_SIMD_HAVE_AVX512IFMA;
241 constexpr inline bool __have_avx512cd = _GLIBCXX_SIMD_HAVE_AVX512CD;
242 constexpr inline bool __have_avx512vnni = _GLIBCXX_SIMD_HAVE_AVX512VNNI;
243 constexpr inline bool __have_avx512vpopcntdq = _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ;
244 constexpr inline bool __have_avx512vp2intersect = _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT;
245 
246 constexpr inline bool __have_neon = _GLIBCXX_SIMD_HAVE_NEON;
247 constexpr inline bool __have_neon_a32 = _GLIBCXX_SIMD_HAVE_NEON_A32;
248 constexpr inline bool __have_neon_a64 = _GLIBCXX_SIMD_HAVE_NEON_A64;
249 constexpr inline bool __support_neon_float =
250 #if defined __GCC_IEC_559
251  __GCC_IEC_559 == 0;
252 #elif defined __FAST_MATH__
253  true;
254 #else
255  false;
256 #endif
257 
258 #ifdef _ARCH_PWR10
259 constexpr inline bool __have_power10vec = true;
260 #else
261 constexpr inline bool __have_power10vec = false;
262 #endif
263 #ifdef __POWER9_VECTOR__
264 constexpr inline bool __have_power9vec = true;
265 #else
266 constexpr inline bool __have_power9vec = false;
267 #endif
268 #if defined __POWER8_VECTOR__
269 constexpr inline bool __have_power8vec = true;
270 #else
271 constexpr inline bool __have_power8vec = __have_power9vec;
272 #endif
273 #if defined __VSX__
274 constexpr inline bool __have_power_vsx = true;
275 #else
276 constexpr inline bool __have_power_vsx = __have_power8vec;
277 #endif
278 #if defined __ALTIVEC__
279 constexpr inline bool __have_power_vmx = true;
280 #else
281 constexpr inline bool __have_power_vmx = __have_power_vsx;
282 #endif
283 
284 // }}}
285 
286 namespace __detail
287 {
288 #ifdef math_errhandling
289  // Determines _S_handle_fpexcept from math_errhandling if it is defined and expands to a constant
290  // expression. math_errhandling may expand to an extern symbol, in which case a constexpr value
291  // must be guessed.
292  template <int = math_errhandling>
293  constexpr bool
294  __handle_fpexcept_impl(int)
295  { return math_errhandling & MATH_ERREXCEPT; }
296 #endif
297 
298  // Fallback if math_errhandling doesn't work: with fast-math assume floating-point exceptions are
299  // ignored, otherwise implement correct exception behavior.
300  constexpr bool
301  __handle_fpexcept_impl(float)
302  {
303 #if defined __FAST_MATH__
304  return false;
305 #else
306  return true;
307 #endif
308  }
309 
310  /// True if math functions must raise floating-point exceptions as specified by C17.
311  static constexpr bool _S_handle_fpexcept = __handle_fpexcept_impl(0);
312 
313  constexpr std::uint_least64_t
314  __floating_point_flags()
315  {
316  std::uint_least64_t __flags = 0;
317  if constexpr (_S_handle_fpexcept)
318  __flags |= 1;
319 #ifdef __FAST_MATH__
320  __flags |= 1 << 1;
321 #elif __FINITE_MATH_ONLY__
322  __flags |= 2 << 1;
323 #elif __GCC_IEC_559 < 2
324  __flags |= 3 << 1;
325 #endif
326  __flags |= (__FLT_EVAL_METHOD__ + 1) << 3;
327  return __flags;
328  }
329 
330  constexpr std::uint_least64_t
331  __machine_flags()
332  {
333  if constexpr (__have_mmx || __have_sse)
334  return __have_mmx
335  | (__have_sse << 1)
336  | (__have_sse2 << 2)
337  | (__have_sse3 << 3)
338  | (__have_ssse3 << 4)
339  | (__have_sse4_1 << 5)
340  | (__have_sse4_2 << 6)
341  | (__have_xop << 7)
342  | (__have_avx << 8)
343  | (__have_avx2 << 9)
344  | (__have_bmi << 10)
345  | (__have_bmi2 << 11)
346  | (__have_lzcnt << 12)
347  | (__have_sse4a << 13)
348  | (__have_fma << 14)
349  | (__have_fma4 << 15)
350  | (__have_f16c << 16)
351  | (__have_popcnt << 17)
352  | (__have_avx512f << 18)
353  | (__have_avx512dq << 19)
354  | (__have_avx512vl << 20)
355  | (__have_avx512bw << 21)
356  | (__have_avx512bitalg << 22)
357  | (__have_avx512vbmi2 << 23)
358  | (__have_avx512vbmi << 24)
359  | (__have_avx512ifma << 25)
360  | (__have_avx512cd << 26)
361  | (__have_avx512vnni << 27)
362  | (__have_avx512vpopcntdq << 28)
363  | (__have_avx512vp2intersect << 29);
364  else if constexpr (__have_neon)
365  return __have_neon
366  | (__have_neon_a32 << 1)
367  | (__have_neon_a64 << 2)
368  | (__have_neon_a64 << 2)
369  | (__support_neon_float << 3);
370  else if constexpr (__have_power_vmx)
371  return __have_power_vmx
372  | (__have_power_vsx << 1)
373  | (__have_power8vec << 2)
374  | (__have_power9vec << 3)
375  | (__have_power10vec << 4);
376  else
377  return 0;
378  }
379 
380  namespace
381  {
382  struct _OdrEnforcer {};
383  }
384 
385  template <std::uint_least64_t...>
386  struct _MachineFlagsTemplate {};
387 
388  /**@internal
389  * Use this type as default template argument to all function templates that
390  * are not declared always_inline. It ensures, that a function
391  * specialization, which the compiler decides not to inline, has a unique symbol
392  * (_OdrEnforcer) or a symbol matching the machine/architecture flags
393  * (_MachineFlagsTemplate). This helps to avoid ODR violations in cases where
394  * users link TUs compiled with different flags. This is especially important
395  * for using simd in libraries.
396  */
397  using __odr_helper
398  = conditional_t<__machine_flags() == 0, _OdrEnforcer,
399  _MachineFlagsTemplate<__machine_flags(), __floating_point_flags()>>;
400 
401  struct _Minimum
402  {
403  template <typename _Tp>
404  _GLIBCXX_SIMD_INTRINSIC constexpr
405  _Tp
406  operator()(_Tp __a, _Tp __b) const
407  {
408  using std::min;
409  return min(__a, __b);
410  }
411  };
412 
413  struct _Maximum
414  {
415  template <typename _Tp>
416  _GLIBCXX_SIMD_INTRINSIC constexpr
417  _Tp
418  operator()(_Tp __a, _Tp __b) const
419  {
420  using std::max;
421  return max(__a, __b);
422  }
423  };
424 } // namespace __detail
425 
426 // unrolled/pack execution helpers
427 // __execute_n_times{{{
428 template <typename _Fp, size_t... _I>
429  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
430  void
431  __execute_on_index_sequence(_Fp&& __f, index_sequence<_I...>)
432  { ((void)__f(_SizeConstant<_I>()), ...); }
433 
434 template <typename _Fp>
435  _GLIBCXX_SIMD_INTRINSIC constexpr void
436  __execute_on_index_sequence(_Fp&&, index_sequence<>)
437  { }
438 
439 template <size_t _Np, typename _Fp>
440  _GLIBCXX_SIMD_INTRINSIC constexpr void
441  __execute_n_times(_Fp&& __f)
442  {
443  __execute_on_index_sequence(static_cast<_Fp&&>(__f),
444  make_index_sequence<_Np>{});
445  }
446 
447 // }}}
448 // __generate_from_n_evaluations{{{
449 template <typename _R, typename _Fp, size_t... _I>
450  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
451  _R
452  __execute_on_index_sequence_with_return(_Fp&& __f, index_sequence<_I...>)
453  { return _R{__f(_SizeConstant<_I>())...}; }
454 
455 template <size_t _Np, typename _R, typename _Fp>
456  _GLIBCXX_SIMD_INTRINSIC constexpr _R
457  __generate_from_n_evaluations(_Fp&& __f)
458  {
459  return __execute_on_index_sequence_with_return<_R>(
460  static_cast<_Fp&&>(__f), make_index_sequence<_Np>{});
461  }
462 
463 // }}}
464 // __call_with_n_evaluations{{{
465 template <size_t... _I, typename _F0, typename _FArgs>
466  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
467  auto
468  __call_with_n_evaluations(index_sequence<_I...>, _F0&& __f0, _FArgs&& __fargs)
469  { return __f0(__fargs(_SizeConstant<_I>())...); }
470 
471 template <size_t _Np, typename _F0, typename _FArgs>
472  _GLIBCXX_SIMD_INTRINSIC constexpr auto
473  __call_with_n_evaluations(_F0&& __f0, _FArgs&& __fargs)
474  {
475  return __call_with_n_evaluations(make_index_sequence<_Np>{},
476  static_cast<_F0&&>(__f0),
477  static_cast<_FArgs&&>(__fargs));
478  }
479 
480 // }}}
481 // __call_with_subscripts{{{
482 template <size_t _First = 0, size_t... _It, typename _Tp, typename _Fp>
483  [[__gnu__::__flatten__]] _GLIBCXX_SIMD_INTRINSIC constexpr
484  auto
485  __call_with_subscripts(_Tp&& __x, index_sequence<_It...>, _Fp&& __fun)
486  { return __fun(__x[_First + _It]...); }
487 
488 template <size_t _Np, size_t _First = 0, typename _Tp, typename _Fp>
489  _GLIBCXX_SIMD_INTRINSIC constexpr auto
490  __call_with_subscripts(_Tp&& __x, _Fp&& __fun)
491  {
492  return __call_with_subscripts<_First>(static_cast<_Tp&&>(__x),
493  make_index_sequence<_Np>(),
494  static_cast<_Fp&&>(__fun));
495  }
496 
497 // }}}
498 
499 // vvv ---- type traits ---- vvv
500 // integer type aliases{{{
501 using _UChar = unsigned char;
502 using _SChar = signed char;
503 using _UShort = unsigned short;
504 using _UInt = unsigned int;
505 using _ULong = unsigned long;
506 using _ULLong = unsigned long long;
507 using _LLong = long long;
508 
509 //}}}
510 // __first_of_pack{{{
511 template <typename _T0, typename...>
512  struct __first_of_pack
513  { using type = _T0; };
514 
515 template <typename... _Ts>
516  using __first_of_pack_t = typename __first_of_pack<_Ts...>::type;
517 
518 //}}}
519 // __value_type_or_identity_t {{{
520 template <typename _Tp>
521  typename _Tp::value_type
522  __value_type_or_identity_impl(int);
523 
524 template <typename _Tp>
525  _Tp
526  __value_type_or_identity_impl(float);
527 
528 template <typename _Tp>
529  using __value_type_or_identity_t
530  = decltype(__value_type_or_identity_impl<_Tp>(int()));
531 
532 // }}}
533 // __is_vectorizable {{{
534 template <typename _Tp>
535  struct __is_vectorizable : public is_arithmetic<_Tp> {};
536 
537 template <>
538  struct __is_vectorizable<bool> : public false_type {};
539 
540 template <typename _Tp>
541  inline constexpr bool __is_vectorizable_v = __is_vectorizable<_Tp>::value;
542 
543 // Deduces to a vectorizable type
544 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
545  using _Vectorizable = _Tp;
546 
547 // }}}
548 // _LoadStorePtr / __is_possible_loadstore_conversion {{{
549 template <typename _Ptr, typename _ValueType>
550  struct __is_possible_loadstore_conversion
551  : conjunction<__is_vectorizable<_Ptr>, __is_vectorizable<_ValueType>> {};
552 
553 template <>
554  struct __is_possible_loadstore_conversion<bool, bool> : true_type {};
555 
556 // Deduces to a type allowed for load/store with the given value type.
557 template <typename _Ptr, typename _ValueType,
558  typename = enable_if_t<
559  __is_possible_loadstore_conversion<_Ptr, _ValueType>::value>>
560  using _LoadStorePtr = _Ptr;
561 
562 // }}}
563 // __is_bitmask{{{
564 template <typename _Tp, typename = void_t<>>
565  struct __is_bitmask : false_type {};
566 
567 template <typename _Tp>
568  inline constexpr bool __is_bitmask_v = __is_bitmask<_Tp>::value;
569 
570 // the __mmaskXX case:
571 template <typename _Tp>
572  struct __is_bitmask<_Tp,
573  void_t<decltype(declval<unsigned&>() = declval<_Tp>() & 1u)>>
574  : true_type {};
575 
576 // }}}
577 // __int_for_sizeof{{{
578 #pragma GCC diagnostic push
579 #pragma GCC diagnostic ignored "-Wpedantic"
580 template <size_t _Bytes>
581  constexpr auto
582  __int_for_sizeof()
583  {
584  static_assert(_Bytes > 0);
585  if constexpr (_Bytes == sizeof(int))
586  return int();
587  else if constexpr (_Bytes == sizeof(_SChar))
588  return _SChar();
589  else if constexpr (_Bytes == sizeof(short))
590  return short();
591  else if constexpr (_Bytes == sizeof(long))
592  return long();
593  else if constexpr (_Bytes == sizeof(_LLong))
594  return _LLong();
595  #ifdef __SIZEOF_INT128__
596  else if constexpr (_Bytes == sizeof(__int128))
597  return __int128();
598  #endif // __SIZEOF_INT128__
599  else if constexpr (_Bytes % sizeof(int) == 0)
600  {
601  constexpr size_t _Np = _Bytes / sizeof(int);
602  struct _Ip
603  {
604  int _M_data[_Np];
605 
606  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
607  operator&(_Ip __rhs) const
608  {
609  return __generate_from_n_evaluations<_Np, _Ip>(
610  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
611  return __rhs._M_data[__i] & _M_data[__i];
612  });
613  }
614 
615  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
616  operator|(_Ip __rhs) const
617  {
618  return __generate_from_n_evaluations<_Np, _Ip>(
619  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
620  return __rhs._M_data[__i] | _M_data[__i];
621  });
622  }
623 
624  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
625  operator^(_Ip __rhs) const
626  {
627  return __generate_from_n_evaluations<_Np, _Ip>(
628  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
629  return __rhs._M_data[__i] ^ _M_data[__i];
630  });
631  }
632 
633  _GLIBCXX_SIMD_INTRINSIC constexpr _Ip
634  operator~() const
635  {
636  return __generate_from_n_evaluations<_Np, _Ip>(
637  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return ~_M_data[__i]; });
638  }
639  };
640  return _Ip{};
641  }
642  else
643  static_assert(_Bytes == 0, "this should be unreachable");
644  }
645 #pragma GCC diagnostic pop
646 
647 template <typename _Tp>
648  using __int_for_sizeof_t = decltype(__int_for_sizeof<sizeof(_Tp)>());
649 
650 template <size_t _Np>
651  using __int_with_sizeof_t = decltype(__int_for_sizeof<_Np>());
652 
653 // }}}
654 // __is_fixed_size_abi{{{
655 template <typename _Tp>
656  struct __is_fixed_size_abi : false_type {};
657 
658 template <int _Np>
659  struct __is_fixed_size_abi<simd_abi::fixed_size<_Np>> : true_type {};
660 
661 template <typename _Tp>
662  inline constexpr bool __is_fixed_size_abi_v = __is_fixed_size_abi<_Tp>::value;
663 
664 // }}}
665 // __is_scalar_abi {{{
666 template <typename _Abi>
667  constexpr bool
668  __is_scalar_abi()
669  { return is_same_v<simd_abi::scalar, _Abi>; }
670 
671 // }}}
672 // __abi_bytes_v {{{
673 template <template <int> class _Abi, int _Bytes>
674  constexpr int
675  __abi_bytes_impl(_Abi<_Bytes>*)
676  { return _Bytes; }
677 
678 template <typename _Tp>
679  constexpr int
680  __abi_bytes_impl(_Tp*)
681  { return -1; }
682 
683 template <typename _Abi>
684  inline constexpr int __abi_bytes_v
685  = __abi_bytes_impl(static_cast<_Abi*>(nullptr));
686 
687 // }}}
688 // __is_builtin_bitmask_abi {{{
689 template <typename _Abi>
690  constexpr bool
691  __is_builtin_bitmask_abi()
692  { return is_same_v<simd_abi::_VecBltnBtmsk<__abi_bytes_v<_Abi>>, _Abi>; }
693 
694 // }}}
695 // __is_sse_abi {{{
696 template <typename _Abi>
697  constexpr bool
698  __is_sse_abi()
699  {
700  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
701  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
702  }
703 
704 // }}}
705 // __is_avx_abi {{{
706 template <typename _Abi>
707  constexpr bool
708  __is_avx_abi()
709  {
710  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
711  return _Bytes > 16 && _Bytes <= 32
712  && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
713  }
714 
715 // }}}
716 // __is_avx512_abi {{{
717 template <typename _Abi>
718  constexpr bool
719  __is_avx512_abi()
720  {
721  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
722  return _Bytes <= 64 && is_same_v<simd_abi::_Avx512<_Bytes>, _Abi>;
723  }
724 
725 // }}}
726 // __is_neon_abi {{{
727 template <typename _Abi>
728  constexpr bool
729  __is_neon_abi()
730  {
731  constexpr auto _Bytes = __abi_bytes_v<_Abi>;
732  return _Bytes <= 16 && is_same_v<simd_abi::_VecBuiltin<_Bytes>, _Abi>;
733  }
734 
735 // }}}
736 // __make_dependent_t {{{
737 template <typename, typename _Up>
738  struct __make_dependent
739  { using type = _Up; };
740 
741 template <typename _Tp, typename _Up>
742  using __make_dependent_t = typename __make_dependent<_Tp, _Up>::type;
743 
744 // }}}
745 // ^^^ ---- type traits ---- ^^^
746 
747 // __invoke_ub{{{
748 template <typename... _Args>
749  [[noreturn]] _GLIBCXX_SIMD_ALWAYS_INLINE void
750  __invoke_ub([[maybe_unused]] const char* __msg, [[maybe_unused]] const _Args&... __args)
751  {
752 #ifdef _GLIBCXX_DEBUG_UB
753  __builtin_fprintf(stderr, __msg, __args...);
754  __builtin_trap();
755 #else
756  __builtin_unreachable();
757 #endif
758  }
759 
760 // }}}
761 // __assert_unreachable{{{
762 template <typename _Tp>
763  struct __assert_unreachable
764  { static_assert(!is_same_v<_Tp, _Tp>, "this should be unreachable"); };
765 
766 // }}}
767 // __size_or_zero_v {{{
768 template <typename _Tp, typename _Ap, size_t _Np = simd_size<_Tp, _Ap>::value>
769  constexpr size_t
770  __size_or_zero_dispatch(int)
771  { return _Np; }
772 
773 template <typename _Tp, typename _Ap>
774  constexpr size_t
775  __size_or_zero_dispatch(float)
776  { return 0; }
777 
778 template <typename _Tp, typename _Ap>
779  inline constexpr size_t __size_or_zero_v
780  = __size_or_zero_dispatch<_Tp, _Ap>(0);
781 
782 // }}}
783 // __div_roundup {{{
784 inline constexpr size_t
785 __div_roundup(size_t __a, size_t __b)
786 { return (__a + __b - 1) / __b; }
787 
788 // }}}
789 // _ExactBool{{{
790 class _ExactBool
791 {
792  const bool _M_data;
793 
794 public:
795  _GLIBCXX_SIMD_INTRINSIC constexpr
796  _ExactBool(bool __b) : _M_data(__b) {}
797 
798  _ExactBool(int) = delete;
799 
800  _GLIBCXX_SIMD_INTRINSIC constexpr
801  operator bool() const
802  { return _M_data; }
803 };
804 
805 // }}}
806 // __may_alias{{{
807 /**@internal
808  * Helper __may_alias<_Tp> that turns _Tp into the type to be used for an
809  * aliasing pointer. This adds the __may_alias attribute to _Tp (with compilers
810  * that support it).
811  */
812 template <typename _Tp>
813  using __may_alias [[__gnu__::__may_alias__]] = _Tp;
814 
815 // }}}
816 // _UnsupportedBase {{{
817 // simd and simd_mask base for unsupported <_Tp, _Abi>
818 struct _UnsupportedBase
819 {
820  _UnsupportedBase() = delete;
821  _UnsupportedBase(const _UnsupportedBase&) = delete;
822  _UnsupportedBase& operator=(const _UnsupportedBase&) = delete;
823  ~_UnsupportedBase() = delete;
824 };
825 
826 // }}}
827 // _InvalidTraits {{{
828 /**
829  * @internal
830  * Defines the implementation of __a given <_Tp, _Abi>.
831  *
832  * Implementations must ensure that only valid <_Tp, _Abi> instantiations are
833  * possible. Static assertions in the type definition do not suffice. It is
834  * important that SFINAE works.
835  */
836 struct _InvalidTraits
837 {
838  using _IsValid = false_type;
839  using _SimdBase = _UnsupportedBase;
840  using _MaskBase = _UnsupportedBase;
841 
842  static constexpr size_t _S_full_size = 0;
843  static constexpr bool _S_is_partial = false;
844 
845  static constexpr size_t _S_simd_align = 1;
846  struct _SimdImpl;
847  struct _SimdMember {};
848  struct _SimdCastType;
849 
850  static constexpr size_t _S_mask_align = 1;
851  struct _MaskImpl;
852  struct _MaskMember {};
853  struct _MaskCastType;
854 };
855 
856 // }}}
857 // _SimdTraits {{{
858 template <typename _Tp, typename _Abi, typename = void_t<>>
859  struct _SimdTraits : _InvalidTraits {};
860 
861 // }}}
862 // __private_init, __bitset_init{{{
863 /**
864  * @internal
865  * Tag used for private init constructor of simd and simd_mask
866  */
867 inline constexpr struct _PrivateInit {} __private_init = {};
868 
869 inline constexpr struct _BitsetInit {} __bitset_init = {};
870 
871 // }}}
872 // __is_narrowing_conversion<_From, _To>{{{
873 template <typename _From, typename _To, bool = is_arithmetic_v<_From>,
874  bool = is_arithmetic_v<_To>>
875  struct __is_narrowing_conversion;
876 
877 // ignore "signed/unsigned mismatch" in the following trait.
878 // The implicit conversions will do the right thing here.
879 template <typename _From, typename _To>
880  struct __is_narrowing_conversion<_From, _To, true, true>
881  : public __bool_constant<(
882  __digits_v<_From> > __digits_v<_To>
883  || __finite_max_v<_From> > __finite_max_v<_To>
884  || __finite_min_v<_From> < __finite_min_v<_To>
885  || (is_signed_v<_From> && is_unsigned_v<_To>))> {};
886 
887 template <typename _Tp>
888  struct __is_narrowing_conversion<_Tp, bool, true, true>
889  : public true_type {};
890 
891 template <>
892  struct __is_narrowing_conversion<bool, bool, true, true>
893  : public false_type {};
894 
895 template <typename _Tp>
896  struct __is_narrowing_conversion<_Tp, _Tp, true, true>
897  : public false_type {};
898 
899 template <typename _From, typename _To>
900  struct __is_narrowing_conversion<_From, _To, false, true>
901  : public negation<is_convertible<_From, _To>> {};
902 
903 // }}}
904 // __converts_to_higher_integer_rank{{{
905 template <typename _From, typename _To, bool = (sizeof(_From) < sizeof(_To))>
906  struct __converts_to_higher_integer_rank : public true_type {};
907 
908 // this may fail for char -> short if sizeof(char) == sizeof(short)
909 template <typename _From, typename _To>
910  struct __converts_to_higher_integer_rank<_From, _To, false>
911  : public is_same<decltype(declval<_From>() + declval<_To>()), _To> {};
912 
913 // }}}
914 // __data(simd/simd_mask) {{{
915 template <typename _Tp, typename _Ap>
916  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
917  __data(const simd<_Tp, _Ap>& __x);
918 
919 template <typename _Tp, typename _Ap>
920  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
921  __data(simd<_Tp, _Ap>& __x);
922 
923 template <typename _Tp, typename _Ap>
924  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
925  __data(const simd_mask<_Tp, _Ap>& __x);
926 
927 template <typename _Tp, typename _Ap>
928  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
929  __data(simd_mask<_Tp, _Ap>& __x);
930 
931 // }}}
932 // _SimdConverter {{{
933 template <typename _FromT, typename _FromA, typename _ToT, typename _ToA,
934  typename = void>
935  struct _SimdConverter;
936 
937 template <typename _Tp, typename _Ap>
938  struct _SimdConverter<_Tp, _Ap, _Tp, _Ap, void>
939  {
940  template <typename _Up>
941  _GLIBCXX_SIMD_INTRINSIC const _Up&
942  operator()(const _Up& __x)
943  { return __x; }
944  };
945 
946 // }}}
947 // __to_value_type_or_member_type {{{
948 template <typename _V>
949  _GLIBCXX_SIMD_INTRINSIC constexpr auto
950  __to_value_type_or_member_type(const _V& __x) -> decltype(__data(__x))
951  { return __data(__x); }
952 
953 template <typename _V>
954  _GLIBCXX_SIMD_INTRINSIC constexpr const typename _V::value_type&
955  __to_value_type_or_member_type(const typename _V::value_type& __x)
956  { return __x; }
957 
958 // }}}
959 // __bool_storage_member_type{{{
960 template <size_t _Size>
961  struct __bool_storage_member_type;
962 
963 template <size_t _Size>
964  using __bool_storage_member_type_t =
965  typename __bool_storage_member_type<_Size>::type;
966 
967 // }}}
968 // _SimdTuple {{{
969 // why not tuple?
970 // 1. tuple gives no guarantee about the storage order, but I require
971 // storage
972 // equivalent to array<_Tp, _Np>
973 // 2. direct access to the element type (first template argument)
974 // 3. enforces equal element type, only different _Abi types are allowed
975 template <typename _Tp, typename... _Abis>
976  struct _SimdTuple;
977 
978 //}}}
979 // __fixed_size_storage_t {{{
980 template <typename _Tp, int _Np>
981  struct __fixed_size_storage;
982 
983 template <typename _Tp, int _Np>
984  using __fixed_size_storage_t = typename __fixed_size_storage<_Tp, _Np>::type;
985 
986 // }}}
987 // _SimdWrapper fwd decl{{{
988 template <typename _Tp, size_t _Size, typename = void_t<>>
989  struct _SimdWrapper;
990 
991 template <typename _Tp>
992  using _SimdWrapper8 = _SimdWrapper<_Tp, 8 / sizeof(_Tp)>;
993 template <typename _Tp>
994  using _SimdWrapper16 = _SimdWrapper<_Tp, 16 / sizeof(_Tp)>;
995 template <typename _Tp>
996  using _SimdWrapper32 = _SimdWrapper<_Tp, 32 / sizeof(_Tp)>;
997 template <typename _Tp>
998  using _SimdWrapper64 = _SimdWrapper<_Tp, 64 / sizeof(_Tp)>;
999 
1000 // }}}
1001 // __is_simd_wrapper {{{
1002 template <typename _Tp>
1003  struct __is_simd_wrapper : false_type {};
1004 
1005 template <typename _Tp, size_t _Np>
1006  struct __is_simd_wrapper<_SimdWrapper<_Tp, _Np>> : true_type {};
1007 
1008 template <typename _Tp>
1009  inline constexpr bool __is_simd_wrapper_v = __is_simd_wrapper<_Tp>::value;
1010 
1011 // }}}
1012 // _BitOps {{{
1013 struct _BitOps
1014 {
1015  // _S_bit_iteration {{{
1016  template <typename _Tp, typename _Fp>
1017  static void
1018  _S_bit_iteration(_Tp __mask, _Fp&& __f)
1019  {
1020  static_assert(sizeof(_ULLong) >= sizeof(_Tp));
1021  conditional_t<sizeof(_Tp) <= sizeof(_UInt), _UInt, _ULLong> __k;
1022  if constexpr (is_convertible_v<_Tp, decltype(__k)>)
1023  __k = __mask;
1024  else
1025  __k = __mask.to_ullong();
1026  while(__k)
1027  {
1028  __f(std::__countr_zero(__k));
1029  __k &= (__k - 1);
1030  }
1031  }
1032 
1033  //}}}
1034 };
1035 
1036 //}}}
1037 // __increment, __decrement {{{
1038 template <typename _Tp = void>
1039  struct __increment
1040  { constexpr _Tp operator()(_Tp __a) const { return ++__a; } };
1041 
1042 template <>
1043  struct __increment<void>
1044  {
1045  template <typename _Tp>
1046  constexpr _Tp
1047  operator()(_Tp __a) const
1048  { return ++__a; }
1049  };
1050 
1051 template <typename _Tp = void>
1052  struct __decrement
1053  { constexpr _Tp operator()(_Tp __a) const { return --__a; } };
1054 
1055 template <>
1056  struct __decrement<void>
1057  {
1058  template <typename _Tp>
1059  constexpr _Tp
1060  operator()(_Tp __a) const
1061  { return --__a; }
1062  };
1063 
1064 // }}}
1065 // _ValuePreserving(OrInt) {{{
1066 template <typename _From, typename _To,
1067  typename = enable_if_t<negation<
1068  __is_narrowing_conversion<__remove_cvref_t<_From>, _To>>::value>>
1069  using _ValuePreserving = _From;
1070 
1071 template <typename _From, typename _To,
1072  typename _DecayedFrom = __remove_cvref_t<_From>,
1073  typename = enable_if_t<conjunction<
1074  is_convertible<_From, _To>,
1075  disjunction<
1076  is_same<_DecayedFrom, _To>, is_same<_DecayedFrom, int>,
1077  conjunction<is_same<_DecayedFrom, _UInt>, is_unsigned<_To>>,
1078  negation<__is_narrowing_conversion<_DecayedFrom, _To>>>>::value>>
1079  using _ValuePreservingOrInt = _From;
1080 
1081 // }}}
1082 // __intrinsic_type {{{
1083 template <typename _Tp, size_t _Bytes, typename = void_t<>>
1084  struct __intrinsic_type;
1085 
1086 template <typename _Tp, size_t _Size>
1087  using __intrinsic_type_t =
1088  typename __intrinsic_type<_Tp, _Size * sizeof(_Tp)>::type;
1089 
1090 template <typename _Tp>
1091  using __intrinsic_type2_t = typename __intrinsic_type<_Tp, 2>::type;
1092 template <typename _Tp>
1093  using __intrinsic_type4_t = typename __intrinsic_type<_Tp, 4>::type;
1094 template <typename _Tp>
1095  using __intrinsic_type8_t = typename __intrinsic_type<_Tp, 8>::type;
1096 template <typename _Tp>
1097  using __intrinsic_type16_t = typename __intrinsic_type<_Tp, 16>::type;
1098 template <typename _Tp>
1099  using __intrinsic_type32_t = typename __intrinsic_type<_Tp, 32>::type;
1100 template <typename _Tp>
1101  using __intrinsic_type64_t = typename __intrinsic_type<_Tp, 64>::type;
1102 
1103 // }}}
1104 // _BitMask {{{
1105 template <size_t _Np, bool _Sanitized = false>
1106  struct _BitMask;
1107 
1108 template <size_t _Np, bool _Sanitized>
1109  struct __is_bitmask<_BitMask<_Np, _Sanitized>, void> : true_type {};
1110 
1111 template <size_t _Np>
1112  using _SanitizedBitMask = _BitMask<_Np, true>;
1113 
1114 template <size_t _Np, bool _Sanitized>
1115  struct _BitMask
1116  {
1117  static_assert(_Np > 0);
1118 
1119  static constexpr size_t _NBytes = __div_roundup(_Np, __CHAR_BIT__);
1120 
1121  using _Tp = conditional_t<_Np == 1, bool,
1122  make_unsigned_t<__int_with_sizeof_t<std::min(
1123  sizeof(_ULLong), std::__bit_ceil(_NBytes))>>>;
1124 
1125  static constexpr int _S_array_size = __div_roundup(_NBytes, sizeof(_Tp));
1126 
1127  _Tp _M_bits[_S_array_size];
1128 
1129  static constexpr int _S_unused_bits
1130  = _Np == 1 ? 0 : _S_array_size * sizeof(_Tp) * __CHAR_BIT__ - _Np;
1131 
1132  static constexpr _Tp _S_bitmask = +_Tp(~_Tp()) >> _S_unused_bits;
1133 
1134  constexpr _BitMask() noexcept = default;
1135 
1136  constexpr _BitMask(unsigned long long __x) noexcept
1137  : _M_bits{static_cast<_Tp>(__x)} {}
1138 
1139  _BitMask(bitset<_Np> __x) noexcept : _BitMask(__x.to_ullong()) {}
1140 
1141  constexpr _BitMask(const _BitMask&) noexcept = default;
1142 
1143  template <bool _RhsSanitized, typename = enable_if_t<_RhsSanitized == false
1144  && _Sanitized == true>>
1145  constexpr _BitMask(const _BitMask<_Np, _RhsSanitized>& __rhs) noexcept
1146  : _BitMask(__rhs._M_sanitized()) {}
1147 
1148  constexpr operator _SimdWrapper<bool, _Np>() const noexcept
1149  {
1150  static_assert(_S_array_size == 1);
1151  return _M_bits[0];
1152  }
1153 
1154  // precondition: is sanitized
1155  constexpr _Tp
1156  _M_to_bits() const noexcept
1157  {
1158  static_assert(_S_array_size == 1);
1159  return _M_bits[0];
1160  }
1161 
1162  // precondition: is sanitized
1163  constexpr unsigned long long
1164  to_ullong() const noexcept
1165  {
1166  static_assert(_S_array_size == 1);
1167  return _M_bits[0];
1168  }
1169 
1170  // precondition: is sanitized
1171  constexpr unsigned long
1172  to_ulong() const noexcept
1173  {
1174  static_assert(_S_array_size == 1);
1175  return _M_bits[0];
1176  }
1177 
1178  constexpr bitset<_Np>
1179  _M_to_bitset() const noexcept
1180  {
1181  static_assert(_S_array_size == 1);
1182  return _M_bits[0];
1183  }
1184 
1185  constexpr decltype(auto)
1186  _M_sanitized() const noexcept
1187  {
1188  if constexpr (_Sanitized)
1189  return *this;
1190  else if constexpr (_Np == 1)
1191  return _SanitizedBitMask<_Np>(_M_bits[0]);
1192  else
1193  {
1194  _SanitizedBitMask<_Np> __r = {};
1195  for (int __i = 0; __i < _S_array_size; ++__i)
1196  __r._M_bits[__i] = _M_bits[__i];
1197  if constexpr (_S_unused_bits > 0)
1198  __r._M_bits[_S_array_size - 1] &= _S_bitmask;
1199  return __r;
1200  }
1201  }
1202 
1203  template <size_t _Mp, bool _LSanitized>
1204  constexpr _BitMask<_Np + _Mp, _Sanitized>
1205  _M_prepend(_BitMask<_Mp, _LSanitized> __lsb) const noexcept
1206  {
1207  constexpr size_t _RN = _Np + _Mp;
1208  using _Rp = _BitMask<_RN, _Sanitized>;
1209  if constexpr (_Rp::_S_array_size == 1)
1210  {
1211  _Rp __r{{_M_bits[0]}};
1212  __r._M_bits[0] <<= _Mp;
1213  __r._M_bits[0] |= __lsb._M_sanitized()._M_bits[0];
1214  return __r;
1215  }
1216  else
1217  __assert_unreachable<_Rp>();
1218  }
1219 
1220  // Return a new _BitMask with size _NewSize while dropping _DropLsb least
1221  // significant bits. If the operation implicitly produces a sanitized bitmask,
1222  // the result type will have _Sanitized set.
1223  template <size_t _DropLsb, size_t _NewSize = _Np - _DropLsb>
1224  constexpr auto
1225  _M_extract() const noexcept
1226  {
1227  static_assert(_Np > _DropLsb);
1228  static_assert(_DropLsb + _NewSize <= sizeof(_ULLong) * __CHAR_BIT__,
1229  "not implemented for bitmasks larger than one ullong");
1230  if constexpr (_NewSize == 1)
1231  // must sanitize because the return _Tp is bool
1232  return _SanitizedBitMask<1>(_M_bits[0] & (_Tp(1) << _DropLsb));
1233  else
1234  return _BitMask<_NewSize,
1235  ((_NewSize + _DropLsb == sizeof(_Tp) * __CHAR_BIT__
1236  && _NewSize + _DropLsb <= _Np)
1237  || ((_Sanitized || _Np == sizeof(_Tp) * __CHAR_BIT__)
1238  && _NewSize + _DropLsb >= _Np))>(_M_bits[0]
1239  >> _DropLsb);
1240  }
1241 
1242  // True if all bits are set. Implicitly sanitizes if _Sanitized == false.
1243  constexpr bool
1244  all() const noexcept
1245  {
1246  if constexpr (_Np == 1)
1247  return _M_bits[0];
1248  else if constexpr (!_Sanitized)
1249  return _M_sanitized().all();
1250  else
1251  {
1252  constexpr _Tp __allbits = ~_Tp();
1253  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1254  if (_M_bits[__i] != __allbits)
1255  return false;
1256  return _M_bits[_S_array_size - 1] == _S_bitmask;
1257  }
1258  }
1259 
1260  // True if at least one bit is set. Implicitly sanitizes if _Sanitized ==
1261  // false.
1262  constexpr bool
1263  any() const noexcept
1264  {
1265  if constexpr (_Np == 1)
1266  return _M_bits[0];
1267  else if constexpr (!_Sanitized)
1268  return _M_sanitized().any();
1269  else
1270  {
1271  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1272  if (_M_bits[__i] != 0)
1273  return true;
1274  return _M_bits[_S_array_size - 1] != 0;
1275  }
1276  }
1277 
1278  // True if no bit is set. Implicitly sanitizes if _Sanitized == false.
1279  constexpr bool
1280  none() const noexcept
1281  {
1282  if constexpr (_Np == 1)
1283  return !_M_bits[0];
1284  else if constexpr (!_Sanitized)
1285  return _M_sanitized().none();
1286  else
1287  {
1288  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1289  if (_M_bits[__i] != 0)
1290  return false;
1291  return _M_bits[_S_array_size - 1] == 0;
1292  }
1293  }
1294 
1295  // Returns the number of set bits. Implicitly sanitizes if _Sanitized ==
1296  // false.
1297  constexpr int
1298  count() const noexcept
1299  {
1300  if constexpr (_Np == 1)
1301  return _M_bits[0];
1302  else if constexpr (!_Sanitized)
1303  return _M_sanitized().none();
1304  else
1305  {
1306  int __result = __builtin_popcountll(_M_bits[0]);
1307  for (int __i = 1; __i < _S_array_size; ++__i)
1308  __result += __builtin_popcountll(_M_bits[__i]);
1309  return __result;
1310  }
1311  }
1312 
1313  // Returns the bit at offset __i as bool.
1314  constexpr bool
1315  operator[](size_t __i) const noexcept
1316  {
1317  if constexpr (_Np == 1)
1318  return _M_bits[0];
1319  else if constexpr (_S_array_size == 1)
1320  return (_M_bits[0] >> __i) & 1;
1321  else
1322  {
1323  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1324  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1325  return (_M_bits[__j] >> __shift) & 1;
1326  }
1327  }
1328 
1329  template <size_t __i>
1330  constexpr bool
1331  operator[](_SizeConstant<__i>) const noexcept
1332  {
1333  static_assert(__i < _Np);
1334  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1335  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1336  return static_cast<bool>(_M_bits[__j] & (_Tp(1) << __shift));
1337  }
1338 
1339  // Set the bit at offset __i to __x.
1340  constexpr void
1341  set(size_t __i, bool __x) noexcept
1342  {
1343  if constexpr (_Np == 1)
1344  _M_bits[0] = __x;
1345  else if constexpr (_S_array_size == 1)
1346  {
1347  _M_bits[0] &= ~_Tp(_Tp(1) << __i);
1348  _M_bits[0] |= _Tp(_Tp(__x) << __i);
1349  }
1350  else
1351  {
1352  const size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1353  const size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1354  _M_bits[__j] &= ~_Tp(_Tp(1) << __shift);
1355  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1356  }
1357  }
1358 
1359  template <size_t __i>
1360  constexpr void
1361  set(_SizeConstant<__i>, bool __x) noexcept
1362  {
1363  static_assert(__i < _Np);
1364  if constexpr (_Np == 1)
1365  _M_bits[0] = __x;
1366  else
1367  {
1368  constexpr size_t __j = __i / (sizeof(_Tp) * __CHAR_BIT__);
1369  constexpr size_t __shift = __i % (sizeof(_Tp) * __CHAR_BIT__);
1370  constexpr _Tp __mask = ~_Tp(_Tp(1) << __shift);
1371  _M_bits[__j] &= __mask;
1372  _M_bits[__j] |= _Tp(_Tp(__x) << __shift);
1373  }
1374  }
1375 
1376  // Inverts all bits. Sanitized input leads to sanitized output.
1377  constexpr _BitMask
1378  operator~() const noexcept
1379  {
1380  if constexpr (_Np == 1)
1381  return !_M_bits[0];
1382  else
1383  {
1384  _BitMask __result{};
1385  for (int __i = 0; __i < _S_array_size - 1; ++__i)
1386  __result._M_bits[__i] = ~_M_bits[__i];
1387  if constexpr (_Sanitized)
1388  __result._M_bits[_S_array_size - 1]
1389  = _M_bits[_S_array_size - 1] ^ _S_bitmask;
1390  else
1391  __result._M_bits[_S_array_size - 1] = ~_M_bits[_S_array_size - 1];
1392  return __result;
1393  }
1394  }
1395 
1396  constexpr _BitMask&
1397  operator^=(const _BitMask& __b) & noexcept
1398  {
1399  __execute_n_times<_S_array_size>(
1400  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] ^= __b._M_bits[__i]; });
1401  return *this;
1402  }
1403 
1404  constexpr _BitMask&
1405  operator|=(const _BitMask& __b) & noexcept
1406  {
1407  __execute_n_times<_S_array_size>(
1408  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] |= __b._M_bits[__i]; });
1409  return *this;
1410  }
1411 
1412  constexpr _BitMask&
1413  operator&=(const _BitMask& __b) & noexcept
1414  {
1415  __execute_n_times<_S_array_size>(
1416  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { _M_bits[__i] &= __b._M_bits[__i]; });
1417  return *this;
1418  }
1419 
1420  friend constexpr _BitMask
1421  operator^(const _BitMask& __a, const _BitMask& __b) noexcept
1422  {
1423  _BitMask __r = __a;
1424  __r ^= __b;
1425  return __r;
1426  }
1427 
1428  friend constexpr _BitMask
1429  operator|(const _BitMask& __a, const _BitMask& __b) noexcept
1430  {
1431  _BitMask __r = __a;
1432  __r |= __b;
1433  return __r;
1434  }
1435 
1436  friend constexpr _BitMask
1437  operator&(const _BitMask& __a, const _BitMask& __b) noexcept
1438  {
1439  _BitMask __r = __a;
1440  __r &= __b;
1441  return __r;
1442  }
1443 
1444  _GLIBCXX_SIMD_INTRINSIC
1445  constexpr bool
1446  _M_is_constprop() const
1447  {
1448  if constexpr (_S_array_size == 0)
1449  return __builtin_constant_p(_M_bits[0]);
1450  else
1451  {
1452  for (int __i = 0; __i < _S_array_size; ++__i)
1453  if (!__builtin_constant_p(_M_bits[__i]))
1454  return false;
1455  return true;
1456  }
1457  }
1458  };
1459 
1460 // }}}
1461 
1462 // vvv ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- vvv
1463 // __min_vector_size {{{
1464 template <typename _Tp = void>
1465  static inline constexpr int __min_vector_size = 2 * sizeof(_Tp);
1466 
1467 #if _GLIBCXX_SIMD_HAVE_NEON
1468 template <>
1469  inline constexpr int __min_vector_size<void> = 8;
1470 #else
1471 template <>
1472  inline constexpr int __min_vector_size<void> = 16;
1473 #endif
1474 
1475 // }}}
1476 // __vector_type {{{
1477 template <typename _Tp, size_t _Np, typename = void>
1478  struct __vector_type_n {};
1479 
1480 // substition failure for 0-element case
1481 template <typename _Tp>
1482  struct __vector_type_n<_Tp, 0, void> {};
1483 
1484 // special case 1-element to be _Tp itself
1485 template <typename _Tp>
1486  struct __vector_type_n<_Tp, 1, enable_if_t<__is_vectorizable_v<_Tp>>>
1487  { using type = _Tp; };
1488 
1489 // else, use GNU-style builtin vector types
1490 template <typename _Tp, size_t _Np>
1491  struct __vector_type_n<_Tp, _Np, enable_if_t<__is_vectorizable_v<_Tp> && _Np >= 2>>
1492  {
1493  static constexpr size_t _S_Np2 = std::__bit_ceil(_Np * sizeof(_Tp));
1494 
1495  static constexpr size_t _S_Bytes =
1496 #ifdef __i386__
1497  // Using [[gnu::vector_size(8)]] would wreak havoc on the FPU because
1498  // those objects are passed via MMX registers and nothing ever calls EMMS.
1499  _S_Np2 == 8 ? 16 :
1500 #endif
1501  _S_Np2 < __min_vector_size<_Tp> ? __min_vector_size<_Tp>
1502  : _S_Np2;
1503 
1504  using type [[__gnu__::__vector_size__(_S_Bytes)]] = _Tp;
1505  };
1506 
1507 template <typename _Tp, size_t _Bytes, size_t = _Bytes % sizeof(_Tp)>
1508  struct __vector_type;
1509 
1510 template <typename _Tp, size_t _Bytes>
1511  struct __vector_type<_Tp, _Bytes, 0>
1512  : __vector_type_n<_Tp, _Bytes / sizeof(_Tp)> {};
1513 
1514 template <typename _Tp, size_t _Size>
1515  using __vector_type_t = typename __vector_type_n<_Tp, _Size>::type;
1516 
1517 template <typename _Tp>
1518  using __vector_type2_t = typename __vector_type<_Tp, 2>::type;
1519 template <typename _Tp>
1520  using __vector_type4_t = typename __vector_type<_Tp, 4>::type;
1521 template <typename _Tp>
1522  using __vector_type8_t = typename __vector_type<_Tp, 8>::type;
1523 template <typename _Tp>
1524  using __vector_type16_t = typename __vector_type<_Tp, 16>::type;
1525 template <typename _Tp>
1526  using __vector_type32_t = typename __vector_type<_Tp, 32>::type;
1527 template <typename _Tp>
1528  using __vector_type64_t = typename __vector_type<_Tp, 64>::type;
1529 
1530 // }}}
1531 // __is_vector_type {{{
1532 template <typename _Tp, typename = void_t<>>
1533  struct __is_vector_type : false_type {};
1534 
1535 template <typename _Tp>
1536  struct __is_vector_type<
1537  _Tp, void_t<typename __vector_type<
1538  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1539  : is_same<_Tp, typename __vector_type<
1540  remove_reference_t<decltype(declval<_Tp>()[0])>,
1541  sizeof(_Tp)>::type> {};
1542 
1543 template <typename _Tp>
1544  inline constexpr bool __is_vector_type_v = __is_vector_type<_Tp>::value;
1545 
1546 // }}}
1547 // __is_intrinsic_type {{{
1548 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
1549 template <typename _Tp>
1550  using __is_intrinsic_type = __is_vector_type<_Tp>;
1551 #else // not SSE (x86)
1552 template <typename _Tp, typename = void_t<>>
1553  struct __is_intrinsic_type : false_type {};
1554 
1555 template <typename _Tp>
1556  struct __is_intrinsic_type<
1557  _Tp, void_t<typename __intrinsic_type<
1558  remove_reference_t<decltype(declval<_Tp>()[0])>, sizeof(_Tp)>::type>>
1559  : is_same<_Tp, typename __intrinsic_type<
1560  remove_reference_t<decltype(declval<_Tp>()[0])>,
1561  sizeof(_Tp)>::type> {};
1562 #endif
1563 
1564 template <typename _Tp>
1565  inline constexpr bool __is_intrinsic_type_v = __is_intrinsic_type<_Tp>::value;
1566 
1567 // }}}
1568 // _VectorTraits{{{
1569 template <typename _Tp, typename = void_t<>>
1570  struct _VectorTraitsImpl;
1571 
1572 template <typename _Tp>
1573  struct _VectorTraitsImpl<_Tp, enable_if_t<__is_vector_type_v<_Tp>
1574  || __is_intrinsic_type_v<_Tp>>>
1575  {
1576  using type = _Tp;
1577  using value_type = remove_reference_t<decltype(declval<_Tp>()[0])>;
1578  static constexpr int _S_full_size = sizeof(_Tp) / sizeof(value_type);
1579  using _Wrapper = _SimdWrapper<value_type, _S_full_size>;
1580  template <typename _Up, int _W = _S_full_size>
1581  static constexpr bool _S_is
1582  = is_same_v<value_type, _Up> && _W == _S_full_size;
1583  };
1584 
1585 template <typename _Tp, size_t _Np>
1586  struct _VectorTraitsImpl<_SimdWrapper<_Tp, _Np>,
1587  void_t<__vector_type_t<_Tp, _Np>>>
1588  {
1589  using type = __vector_type_t<_Tp, _Np>;
1590  using value_type = _Tp;
1591  static constexpr int _S_full_size = sizeof(type) / sizeof(value_type);
1592  using _Wrapper = _SimdWrapper<_Tp, _Np>;
1593  static constexpr bool _S_is_partial = (_Np == _S_full_size);
1594  static constexpr int _S_partial_width = _Np;
1595  template <typename _Up, int _W = _S_full_size>
1596  static constexpr bool _S_is
1597  = is_same_v<value_type, _Up>&& _W == _S_full_size;
1598  };
1599 
1600 template <typename _Tp, typename = typename _VectorTraitsImpl<_Tp>::type>
1601  using _VectorTraits = _VectorTraitsImpl<_Tp>;
1602 
1603 // }}}
1604 // __as_vector{{{
1605 template <typename _V>
1606  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1607  __as_vector(_V __x)
1608  {
1609  if constexpr (__is_vector_type_v<_V>)
1610  return __x;
1611  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1612  {
1613  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
1614  {
1615  static_assert(is_simd<_V>::value);
1616  static_assert(_V::abi_type::template __traits<
1617  typename _V::value_type>::_SimdMember::_S_tuple_size == 1);
1618  return __as_vector(__data(__x).first);
1619  }
1620  else if constexpr (_V::size() > 1)
1621  return __data(__x)._M_data;
1622  else
1623  {
1624  static_assert(is_simd<_V>::value);
1625  using _Tp = typename _V::value_type;
1626 #ifdef __i386__
1627  constexpr auto __bytes = sizeof(_Tp) == 8 ? 16 : sizeof(_Tp);
1628  using _RV [[__gnu__::__vector_size__(__bytes)]] = _Tp;
1629 #else
1630  using _RV [[__gnu__::__vector_size__(sizeof(_Tp))]] = _Tp;
1631 #endif
1632  return _RV{__data(__x)};
1633  }
1634  }
1635  else if constexpr (__is_vectorizable_v<_V>)
1636  return __vector_type_t<_V, 2>{__x};
1637  else
1638  return __x._M_data;
1639  }
1640 
1641 // }}}
1642 // __as_wrapper{{{
1643 template <size_t _Np = 0, typename _V>
1644  _GLIBCXX_SIMD_INTRINSIC constexpr auto
1645  __as_wrapper(_V __x)
1646  {
1647  if constexpr (__is_vector_type_v<_V>)
1648  return _SimdWrapper<typename _VectorTraits<_V>::value_type,
1649  (_Np > 0 ? _Np : _VectorTraits<_V>::_S_full_size)>(__x);
1650  else if constexpr (is_simd<_V>::value || is_simd_mask<_V>::value)
1651  {
1652  static_assert(_V::size() == _Np);
1653  return __data(__x);
1654  }
1655  else
1656  {
1657  static_assert(_V::_S_size == _Np);
1658  return __x;
1659  }
1660  }
1661 
1662 // }}}
1663 // __intrin_bitcast{{{
1664 template <typename _To, typename _From>
1665  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1666  __intrin_bitcast(_From __v)
1667  {
1668  static_assert((__is_vector_type_v<_From> || __is_intrinsic_type_v<_From>)
1669  && (__is_vector_type_v<_To> || __is_intrinsic_type_v<_To>));
1670  if constexpr (sizeof(_To) == sizeof(_From))
1671  return reinterpret_cast<_To>(__v);
1672  else if constexpr (sizeof(_From) > sizeof(_To))
1673  if constexpr (sizeof(_To) >= 16)
1674  return reinterpret_cast<const __may_alias<_To>&>(__v);
1675  else
1676  {
1677  _To __r;
1678  __builtin_memcpy(&__r, &__v, sizeof(_To));
1679  return __r;
1680  }
1681 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1682  else if constexpr (__have_avx && sizeof(_From) == 16 && sizeof(_To) == 32)
1683  return reinterpret_cast<_To>(__builtin_ia32_ps256_ps(
1684  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1685  else if constexpr (__have_avx512f && sizeof(_From) == 16
1686  && sizeof(_To) == 64)
1687  return reinterpret_cast<_To>(__builtin_ia32_ps512_ps(
1688  reinterpret_cast<__vector_type_t<float, 4>>(__v)));
1689  else if constexpr (__have_avx512f && sizeof(_From) == 32
1690  && sizeof(_To) == 64)
1691  return reinterpret_cast<_To>(__builtin_ia32_ps512_256ps(
1692  reinterpret_cast<__vector_type_t<float, 8>>(__v)));
1693 #endif // _GLIBCXX_SIMD_X86INTRIN
1694  else if constexpr (sizeof(__v) <= 8)
1695  return reinterpret_cast<_To>(
1696  __vector_type_t<__int_for_sizeof_t<_From>, sizeof(_To) / sizeof(_From)>{
1697  reinterpret_cast<__int_for_sizeof_t<_From>>(__v)});
1698  else
1699  {
1700  static_assert(sizeof(_To) > sizeof(_From));
1701  _To __r = {};
1702  __builtin_memcpy(&__r, &__v, sizeof(_From));
1703  return __r;
1704  }
1705  }
1706 
1707 // }}}
1708 // __vector_bitcast{{{
1709 template <typename _To, size_t _NN = 0, typename _From,
1710  typename _FromVT = _VectorTraits<_From>,
1711  size_t _Np = _NN == 0 ? sizeof(_From) / sizeof(_To) : _NN>
1712  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1713  __vector_bitcast(_From __x)
1714  {
1715  using _R = __vector_type_t<_To, _Np>;
1716  return __intrin_bitcast<_R>(__x);
1717  }
1718 
1719 template <typename _To, size_t _NN = 0, typename _Tp, size_t _Nx,
1720  size_t _Np
1721  = _NN == 0 ? sizeof(_SimdWrapper<_Tp, _Nx>) / sizeof(_To) : _NN>
1722  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_To, _Np>
1723  __vector_bitcast(const _SimdWrapper<_Tp, _Nx>& __x)
1724  {
1725  static_assert(_Np > 1);
1726  return __intrin_bitcast<__vector_type_t<_To, _Np>>(__x._M_data);
1727  }
1728 
1729 // }}}
1730 // __convert_x86 declarations {{{
1731 #ifdef _GLIBCXX_SIMD_WORKAROUND_PR85048
1732 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1733  _To __convert_x86(_Tp);
1734 
1735 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1736  _To __convert_x86(_Tp, _Tp);
1737 
1738 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1739  _To __convert_x86(_Tp, _Tp, _Tp, _Tp);
1740 
1741 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1742  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp);
1743 
1744 template <typename _To, typename _Tp, typename _TVT = _VectorTraits<_Tp>>
1745  _To __convert_x86(_Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp, _Tp,
1746  _Tp, _Tp, _Tp, _Tp);
1747 #endif // _GLIBCXX_SIMD_WORKAROUND_PR85048
1748 
1749 //}}}
1750 // __bit_cast {{{
1751 template <typename _To, typename _From>
1752  _GLIBCXX_SIMD_INTRINSIC constexpr _To
1753  __bit_cast(const _From __x)
1754  {
1755 #if __has_builtin(__builtin_bit_cast)
1756  return __builtin_bit_cast(_To, __x);
1757 #else
1758  static_assert(sizeof(_To) == sizeof(_From));
1759  constexpr bool __to_is_vectorizable
1760  = is_arithmetic_v<_To> || is_enum_v<_To>;
1761  constexpr bool __from_is_vectorizable
1762  = is_arithmetic_v<_From> || is_enum_v<_From>;
1763  if constexpr (__is_vector_type_v<_To> && __is_vector_type_v<_From>)
1764  return reinterpret_cast<_To>(__x);
1765  else if constexpr (__is_vector_type_v<_To> && __from_is_vectorizable)
1766  {
1767  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1768  return reinterpret_cast<_To>(_FV{__x});
1769  }
1770  else if constexpr (__to_is_vectorizable && __from_is_vectorizable)
1771  {
1772  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1773  using _FV [[__gnu__::__vector_size__(sizeof(_From))]] = _From;
1774  return reinterpret_cast<_TV>(_FV{__x})[0];
1775  }
1776  else if constexpr (__to_is_vectorizable && __is_vector_type_v<_From>)
1777  {
1778  using _TV [[__gnu__::__vector_size__(sizeof(_To))]] = _To;
1779  return reinterpret_cast<_TV>(__x)[0];
1780  }
1781  else
1782  {
1783  _To __r;
1784  __builtin_memcpy(reinterpret_cast<char*>(&__r),
1785  reinterpret_cast<const char*>(&__x), sizeof(_To));
1786  return __r;
1787  }
1788 #endif
1789  }
1790 
1791 // }}}
1792 // __to_intrin {{{
1793 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
1794  typename _R = __intrinsic_type_t<typename _TVT::value_type, _TVT::_S_full_size>>
1795  _GLIBCXX_SIMD_INTRINSIC constexpr _R
1796  __to_intrin(_Tp __x)
1797  {
1798  static_assert(sizeof(__x) <= sizeof(_R),
1799  "__to_intrin may never drop values off the end");
1800  if constexpr (sizeof(__x) == sizeof(_R))
1801  return reinterpret_cast<_R>(__as_vector(__x));
1802  else
1803  {
1804  using _Up = __int_for_sizeof_t<_Tp>;
1805  return reinterpret_cast<_R>(
1806  __vector_type_t<_Up, sizeof(_R) / sizeof(_Up)>{__bit_cast<_Up>(__x)});
1807  }
1808  }
1809 
1810 // }}}
1811 // __make_vector{{{
1812 template <typename _Tp, typename... _Args>
1813  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, sizeof...(_Args)>
1814  __make_vector(const _Args&... __args)
1815  { return __vector_type_t<_Tp, sizeof...(_Args)>{static_cast<_Tp>(__args)...}; }
1816 
1817 // }}}
1818 // __vector_broadcast{{{
1819 template <size_t _Np, typename _Tp, size_t... _I>
1820  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1821  __vector_broadcast_impl(_Tp __x, index_sequence<_I...>)
1822  { return __vector_type_t<_Tp, _Np>{((void)_I, __x)...}; }
1823 
1824 template <size_t _Np, typename _Tp>
1825  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1826  __vector_broadcast(_Tp __x)
1827  { return __vector_broadcast_impl<_Np, _Tp>(__x, make_index_sequence<_Np>()); }
1828 
1829 // }}}
1830 // __generate_vector{{{
1831  template <typename _Tp, size_t _Np, typename _Gp, size_t... _I>
1832  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1833  __generate_vector_impl(_Gp&& __gen, index_sequence<_I...>)
1834  { return __vector_type_t<_Tp, _Np>{ static_cast<_Tp>(__gen(_SizeConstant<_I>()))...}; }
1835 
1836 template <typename _V, typename _VVT = _VectorTraits<_V>, typename _Gp>
1837  _GLIBCXX_SIMD_INTRINSIC constexpr _V
1838  __generate_vector(_Gp&& __gen)
1839  {
1840  if constexpr (__is_vector_type_v<_V>)
1841  return __generate_vector_impl<typename _VVT::value_type,
1842  _VVT::_S_full_size>(
1843  static_cast<_Gp&&>(__gen), make_index_sequence<_VVT::_S_full_size>());
1844  else
1845  return __generate_vector_impl<typename _VVT::value_type,
1846  _VVT::_S_partial_width>(
1847  static_cast<_Gp&&>(__gen),
1848  make_index_sequence<_VVT::_S_partial_width>());
1849  }
1850 
1851 template <typename _Tp, size_t _Np, typename _Gp>
1852  _GLIBCXX_SIMD_INTRINSIC constexpr __vector_type_t<_Tp, _Np>
1853  __generate_vector(_Gp&& __gen)
1854  {
1855  return __generate_vector_impl<_Tp, _Np>(static_cast<_Gp&&>(__gen),
1856  make_index_sequence<_Np>());
1857  }
1858 
1859 // }}}
1860 // __xor{{{
1861 template <typename _TW>
1862  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1863  __xor(_TW __a, _TW __b) noexcept
1864  {
1865  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1866  {
1867  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1868  _VectorTraitsImpl<_TW>>::value_type;
1869  if constexpr (is_floating_point_v<_Tp>)
1870  {
1871  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1872  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1873  ^ __vector_bitcast<_Ip>(__b));
1874  }
1875  else if constexpr (__is_vector_type_v<_TW>)
1876  return __a ^ __b;
1877  else
1878  return __a._M_data ^ __b._M_data;
1879  }
1880  else
1881  return __a ^ __b;
1882  }
1883 
1884 // }}}
1885 // __or{{{
1886 template <typename _TW>
1887  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1888  __or(_TW __a, _TW __b) noexcept
1889  {
1890  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1891  {
1892  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1893  _VectorTraitsImpl<_TW>>::value_type;
1894  if constexpr (is_floating_point_v<_Tp>)
1895  {
1896  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1897  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1898  | __vector_bitcast<_Ip>(__b));
1899  }
1900  else if constexpr (__is_vector_type_v<_TW>)
1901  return __a | __b;
1902  else
1903  return __a._M_data | __b._M_data;
1904  }
1905  else
1906  return __a | __b;
1907  }
1908 
1909 // }}}
1910 // __and{{{
1911 template <typename _TW>
1912  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
1913  __and(_TW __a, _TW __b) noexcept
1914  {
1915  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
1916  {
1917  using _Tp = typename conditional_t<__is_simd_wrapper_v<_TW>, _TW,
1918  _VectorTraitsImpl<_TW>>::value_type;
1919  if constexpr (is_floating_point_v<_Tp>)
1920  {
1921  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
1922  return __vector_bitcast<_Tp>(__vector_bitcast<_Ip>(__a)
1923  & __vector_bitcast<_Ip>(__b));
1924  }
1925  else if constexpr (__is_vector_type_v<_TW>)
1926  return __a & __b;
1927  else
1928  return __a._M_data & __b._M_data;
1929  }
1930  else
1931  return __a & __b;
1932  }
1933 
1934 // }}}
1935 // __andnot{{{
1936 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
1937 static constexpr struct
1938 {
1939  _GLIBCXX_SIMD_INTRINSIC __v4sf
1940  operator()(__v4sf __a, __v4sf __b) const noexcept
1941  { return __builtin_ia32_andnps(__a, __b); }
1942 
1943  _GLIBCXX_SIMD_INTRINSIC __v2df
1944  operator()(__v2df __a, __v2df __b) const noexcept
1945  { return __builtin_ia32_andnpd(__a, __b); }
1946 
1947  _GLIBCXX_SIMD_INTRINSIC __v2di
1948  operator()(__v2di __a, __v2di __b) const noexcept
1949  { return __builtin_ia32_pandn128(__a, __b); }
1950 
1951  _GLIBCXX_SIMD_INTRINSIC __v8sf
1952  operator()(__v8sf __a, __v8sf __b) const noexcept
1953  { return __builtin_ia32_andnps256(__a, __b); }
1954 
1955  _GLIBCXX_SIMD_INTRINSIC __v4df
1956  operator()(__v4df __a, __v4df __b) const noexcept
1957  { return __builtin_ia32_andnpd256(__a, __b); }
1958 
1959  _GLIBCXX_SIMD_INTRINSIC __v4di
1960  operator()(__v4di __a, __v4di __b) const noexcept
1961  {
1962  if constexpr (__have_avx2)
1963  return __builtin_ia32_andnotsi256(__a, __b);
1964  else
1965  return reinterpret_cast<__v4di>(
1966  __builtin_ia32_andnpd256(reinterpret_cast<__v4df>(__a),
1967  reinterpret_cast<__v4df>(__b)));
1968  }
1969 
1970  _GLIBCXX_SIMD_INTRINSIC __v16sf
1971  operator()(__v16sf __a, __v16sf __b) const noexcept
1972  {
1973  if constexpr (__have_avx512dq)
1974  return _mm512_andnot_ps(__a, __b);
1975  else
1976  return reinterpret_cast<__v16sf>(
1977  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1978  reinterpret_cast<__v8di>(__b)));
1979  }
1980 
1981  _GLIBCXX_SIMD_INTRINSIC __v8df
1982  operator()(__v8df __a, __v8df __b) const noexcept
1983  {
1984  if constexpr (__have_avx512dq)
1985  return _mm512_andnot_pd(__a, __b);
1986  else
1987  return reinterpret_cast<__v8df>(
1988  _mm512_andnot_si512(reinterpret_cast<__v8di>(__a),
1989  reinterpret_cast<__v8di>(__b)));
1990  }
1991 
1992  _GLIBCXX_SIMD_INTRINSIC __v8di
1993  operator()(__v8di __a, __v8di __b) const noexcept
1994  { return _mm512_andnot_si512(__a, __b); }
1995 } _S_x86_andnot;
1996 #endif // _GLIBCXX_SIMD_X86INTRIN && !__clang__
1997 
1998 template <typename _TW>
1999  _GLIBCXX_SIMD_INTRINSIC constexpr _TW
2000  __andnot(_TW __a, _TW __b) noexcept
2001  {
2002  if constexpr (__is_vector_type_v<_TW> || __is_simd_wrapper_v<_TW>)
2003  {
2004  using _TVT = conditional_t<__is_simd_wrapper_v<_TW>, _TW,
2005  _VectorTraitsImpl<_TW>>;
2006  using _Tp = typename _TVT::value_type;
2007 #if _GLIBCXX_SIMD_X86INTRIN && !defined __clang__
2008  if constexpr (sizeof(_TW) >= 16)
2009  {
2010  const auto __ai = __to_intrin(__a);
2011  const auto __bi = __to_intrin(__b);
2012  if (!__builtin_is_constant_evaluated()
2013  && !(__builtin_constant_p(__ai) && __builtin_constant_p(__bi)))
2014  {
2015  const auto __r = _S_x86_andnot(__ai, __bi);
2016  if constexpr (is_convertible_v<decltype(__r), _TW>)
2017  return __r;
2018  else
2019  return reinterpret_cast<typename _TVT::type>(__r);
2020  }
2021  }
2022 #endif // _GLIBCXX_SIMD_X86INTRIN
2023  using _Ip = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
2024  return __vector_bitcast<_Tp>(~__vector_bitcast<_Ip>(__a)
2025  & __vector_bitcast<_Ip>(__b));
2026  }
2027  else
2028  return ~__a & __b;
2029  }
2030 
2031 // }}}
2032 // __not{{{
2033 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2034  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2035  __not(_Tp __a) noexcept
2036  {
2037  if constexpr (is_floating_point_v<typename _TVT::value_type>)
2038  return reinterpret_cast<typename _TVT::type>(
2039  ~__vector_bitcast<unsigned>(__a));
2040  else
2041  return ~__a;
2042  }
2043 
2044 // }}}
2045 // __vec_shuffle{{{
2046 template <typename _T0, typename _T1, typename _Fun, size_t... _Is>
2047  _GLIBCXX_SIMD_INTRINSIC constexpr
2048  __vector_type_t<remove_reference_t<decltype(declval<_T0>()[0])>, sizeof...(_Is)>
2049  __vec_shuffle(_T0 __x, _T1 __y, index_sequence<_Is...> __seq, _Fun __idx_perm)
2050  {
2051  constexpr int _N0 = sizeof(__x) / sizeof(__x[0]);
2052  constexpr int _N1 = sizeof(__y) / sizeof(__y[0]);
2053  using _Tp = remove_reference_t<decltype(declval<_T0>()[0])>;
2054  using _RV [[maybe_unused]] = __vector_type_t<_Tp, sizeof...(_Is)>;
2055 #if __has_builtin(__builtin_shufflevector)
2056 #ifdef __clang__
2057  // Clang requires _T0 == _T1
2058  if constexpr (sizeof(__x) > sizeof(__y) and _N1 == 1)
2059  return __vec_shuffle(__x, _T0{__y[0]}, __seq, __idx_perm);
2060  else if constexpr (sizeof(__x) > sizeof(__y))
2061  return __vec_shuffle(__x, __intrin_bitcast<_T0>(__y), __seq, __idx_perm);
2062  else if constexpr (sizeof(__x) < sizeof(__y) and _N0 == 1)
2063  return __vec_shuffle(_T1{__x[0]}, __y, __seq, [=](int __i) {
2064  __i = __idx_perm(__i);
2065  return __i < _N0 ? __i : __i - _N0 + _N1;
2066  });
2067  else if constexpr (sizeof(__x) < sizeof(__y))
2068  return __vec_shuffle(__intrin_bitcast<_T1>(__x), __y, __seq, [=](int __i) {
2069  __i = __idx_perm(__i);
2070  return __i < _N0 ? __i : __i - _N0 + _N1;
2071  });
2072  else
2073 #endif
2074  {
2075  const auto __r = __builtin_shufflevector(__x, __y, [=] {
2076  constexpr int __j = __idx_perm(_Is);
2077  static_assert(__j < _N0 + _N1);
2078  return __j;
2079  }()...);
2080 #ifdef __i386__
2081  if constexpr (sizeof(__r) == sizeof(_RV))
2082  return __r;
2083  else
2084  return _RV {__r[_Is]...};
2085 #else
2086  return __r;
2087 #endif
2088  }
2089 #else
2090  return _RV {
2091  [=]() -> _Tp {
2092  constexpr int __j = __idx_perm(_Is);
2093  static_assert(__j < _N0 + _N1);
2094  if constexpr (__j < 0)
2095  return 0;
2096  else if constexpr (__j < _N0)
2097  return __x[__j];
2098  else
2099  return __y[__j - _N0];
2100  }()...
2101  };
2102 #endif
2103  }
2104 
2105 template <typename _T0, typename _Fun, typename _Seq>
2106  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2107  __vec_shuffle(_T0 __x, _Seq __seq, _Fun __idx_perm)
2108  { return __vec_shuffle(__x, _T0(), __seq, __idx_perm); }
2109 
2110 // }}}
2111 // __concat{{{
2112 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>,
2113  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size * 2>>
2114  constexpr _R
2115  __concat(_Tp a_, _Tp b_)
2116  {
2117 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2118  using _W
2119  = conditional_t<is_floating_point_v<typename _TVT::value_type>, double,
2120  conditional_t<(sizeof(_Tp) >= 2 * sizeof(long long)),
2121  long long, typename _TVT::value_type>>;
2122  constexpr int input_width = sizeof(_Tp) / sizeof(_W);
2123  const auto __a = __vector_bitcast<_W>(a_);
2124  const auto __b = __vector_bitcast<_W>(b_);
2125  using _Up = __vector_type_t<_W, sizeof(_R) / sizeof(_W)>;
2126 #else
2127  constexpr int input_width = _TVT::_S_full_size;
2128  const _Tp& __a = a_;
2129  const _Tp& __b = b_;
2130  using _Up = _R;
2131 #endif
2132  if constexpr (input_width == 2)
2133  return reinterpret_cast<_R>(_Up{__a[0], __a[1], __b[0], __b[1]});
2134  else if constexpr (input_width == 4)
2135  return reinterpret_cast<_R>(
2136  _Up{__a[0], __a[1], __a[2], __a[3], __b[0], __b[1], __b[2], __b[3]});
2137  else if constexpr (input_width == 8)
2138  return reinterpret_cast<_R>(
2139  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6], __a[7],
2140  __b[0], __b[1], __b[2], __b[3], __b[4], __b[5], __b[6], __b[7]});
2141  else if constexpr (input_width == 16)
2142  return reinterpret_cast<_R>(
2143  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2144  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2145  __a[14], __a[15], __b[0], __b[1], __b[2], __b[3], __b[4],
2146  __b[5], __b[6], __b[7], __b[8], __b[9], __b[10], __b[11],
2147  __b[12], __b[13], __b[14], __b[15]});
2148  else if constexpr (input_width == 32)
2149  return reinterpret_cast<_R>(
2150  _Up{__a[0], __a[1], __a[2], __a[3], __a[4], __a[5], __a[6],
2151  __a[7], __a[8], __a[9], __a[10], __a[11], __a[12], __a[13],
2152  __a[14], __a[15], __a[16], __a[17], __a[18], __a[19], __a[20],
2153  __a[21], __a[22], __a[23], __a[24], __a[25], __a[26], __a[27],
2154  __a[28], __a[29], __a[30], __a[31], __b[0], __b[1], __b[2],
2155  __b[3], __b[4], __b[5], __b[6], __b[7], __b[8], __b[9],
2156  __b[10], __b[11], __b[12], __b[13], __b[14], __b[15], __b[16],
2157  __b[17], __b[18], __b[19], __b[20], __b[21], __b[22], __b[23],
2158  __b[24], __b[25], __b[26], __b[27], __b[28], __b[29], __b[30],
2159  __b[31]});
2160  }
2161 
2162 // }}}
2163 // __zero_extend {{{
2164 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2165  struct _ZeroExtendProxy
2166  {
2167  using value_type = typename _TVT::value_type;
2168  static constexpr size_t _Np = _TVT::_S_full_size;
2169  const _Tp __x;
2170 
2171  template <typename _To, typename _ToVT = _VectorTraits<_To>,
2172  typename
2173  = enable_if_t<is_same_v<typename _ToVT::value_type, value_type>>>
2174  _GLIBCXX_SIMD_INTRINSIC operator _To() const
2175  {
2176  constexpr size_t _ToN = _ToVT::_S_full_size;
2177  if constexpr (_ToN == _Np)
2178  return __x;
2179  else if constexpr (_ToN == 2 * _Np)
2180  {
2181 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2182  if constexpr (__have_avx && _TVT::template _S_is<float, 4>)
2183  return __vector_bitcast<value_type>(
2184  _mm256_insertf128_ps(__m256(), __x, 0));
2185  else if constexpr (__have_avx && _TVT::template _S_is<double, 2>)
2186  return __vector_bitcast<value_type>(
2187  _mm256_insertf128_pd(__m256d(), __x, 0));
2188  else if constexpr (__have_avx2 && _Np * sizeof(value_type) == 16)
2189  return __vector_bitcast<value_type>(
2190  _mm256_insertf128_si256(__m256i(), __to_intrin(__x), 0));
2191  else if constexpr (__have_avx512f && _TVT::template _S_is<float, 8>)
2192  {
2193  if constexpr (__have_avx512dq)
2194  return __vector_bitcast<value_type>(
2195  _mm512_insertf32x8(__m512(), __x, 0));
2196  else
2197  return reinterpret_cast<__m512>(
2198  _mm512_insertf64x4(__m512d(),
2199  reinterpret_cast<__m256d>(__x), 0));
2200  }
2201  else if constexpr (__have_avx512f
2202  && _TVT::template _S_is<double, 4>)
2203  return __vector_bitcast<value_type>(
2204  _mm512_insertf64x4(__m512d(), __x, 0));
2205  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 32)
2206  return __vector_bitcast<value_type>(
2207  _mm512_inserti64x4(__m512i(), __to_intrin(__x), 0));
2208 #endif
2209  return __concat(__x, _Tp());
2210  }
2211  else if constexpr (_ToN == 4 * _Np)
2212  {
2213 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_3
2214  if constexpr (__have_avx512dq && _TVT::template _S_is<double, 2>)
2215  {
2216  return __vector_bitcast<value_type>(
2217  _mm512_insertf64x2(__m512d(), __x, 0));
2218  }
2219  else if constexpr (__have_avx512f
2220  && is_floating_point_v<value_type>)
2221  {
2222  return __vector_bitcast<value_type>(
2223  _mm512_insertf32x4(__m512(), reinterpret_cast<__m128>(__x),
2224  0));
2225  }
2226  else if constexpr (__have_avx512f && _Np * sizeof(value_type) == 16)
2227  {
2228  return __vector_bitcast<value_type>(
2229  _mm512_inserti32x4(__m512i(), __to_intrin(__x), 0));
2230  }
2231 #endif
2232  return __concat(__concat(__x, _Tp()),
2233  __vector_type_t<value_type, _Np * 2>());
2234  }
2235  else if constexpr (_ToN == 8 * _Np)
2236  return __concat(operator __vector_type_t<value_type, _Np * 4>(),
2237  __vector_type_t<value_type, _Np * 4>());
2238  else if constexpr (_ToN == 16 * _Np)
2239  return __concat(operator __vector_type_t<value_type, _Np * 8>(),
2240  __vector_type_t<value_type, _Np * 8>());
2241  else
2242  __assert_unreachable<_Tp>();
2243  }
2244  };
2245 
2246 template <typename _Tp, typename _TVT = _VectorTraits<_Tp>>
2247  _GLIBCXX_SIMD_INTRINSIC _ZeroExtendProxy<_Tp, _TVT>
2248  __zero_extend(_Tp __x)
2249  { return {__x}; }
2250 
2251 // }}}
2252 // __extract<_Np, By>{{{
2253 template <int _Offset,
2254  int _SplitBy,
2255  typename _Tp,
2256  typename _TVT = _VectorTraits<_Tp>,
2257  typename _R = __vector_type_t<typename _TVT::value_type, _TVT::_S_full_size / _SplitBy>>
2258  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2259  __extract(_Tp __in)
2260  {
2261  using value_type = typename _TVT::value_type;
2262 #if _GLIBCXX_SIMD_X86INTRIN // {{{
2263  if constexpr (sizeof(_Tp) == 64 && _SplitBy == 4 && _Offset > 0)
2264  {
2265  if constexpr (__have_avx512dq && is_same_v<double, value_type>)
2266  return _mm512_extractf64x2_pd(__to_intrin(__in), _Offset);
2267  else if constexpr (is_floating_point_v<value_type>)
2268  return __vector_bitcast<value_type>(
2269  _mm512_extractf32x4_ps(__intrin_bitcast<__m512>(__in), _Offset));
2270  else
2271  return reinterpret_cast<_R>(
2272  _mm512_extracti32x4_epi32(__intrin_bitcast<__m512i>(__in),
2273  _Offset));
2274  }
2275  else
2276 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
2277  {
2278 #ifdef _GLIBCXX_SIMD_WORKAROUND_XXX_1
2279  using _W = conditional_t<
2280  is_floating_point_v<value_type>, double,
2281  conditional_t<(sizeof(_R) >= 16), long long, value_type>>;
2282  static_assert(sizeof(_R) % sizeof(_W) == 0);
2283  constexpr int __return_width = sizeof(_R) / sizeof(_W);
2284  using _Up = __vector_type_t<_W, __return_width>;
2285  const auto __x = __vector_bitcast<_W>(__in);
2286 #else
2287  constexpr int __return_width = _TVT::_S_full_size / _SplitBy;
2288  using _Up = _R;
2289  const __vector_type_t<value_type, _TVT::_S_full_size>& __x
2290  = __in; // only needed for _Tp = _SimdWrapper<value_type, _Np>
2291 #endif
2292  constexpr int _O = _Offset * __return_width;
2293  return __call_with_subscripts<__return_width, _O>(
2294  __x, [](auto... __entries) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2295  return reinterpret_cast<_R>(_Up{__entries...});
2296  });
2297  }
2298  }
2299 
2300 // }}}
2301 // __lo/__hi64[z]{{{
2302 template <typename _Tp,
2303  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2304  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2305  __lo64(_Tp __x)
2306  {
2307  _R __r{};
2308  __builtin_memcpy(&__r, &__x, 8);
2309  return __r;
2310  }
2311 
2312 template <typename _Tp,
2313  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2314  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2315  __hi64(_Tp __x)
2316  {
2317  static_assert(sizeof(_Tp) == 16, "use __hi64z if you meant it");
2318  _R __r{};
2319  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2320  return __r;
2321  }
2322 
2323 template <typename _Tp,
2324  typename _R = __vector_type8_t<typename _VectorTraits<_Tp>::value_type>>
2325  _GLIBCXX_SIMD_INTRINSIC constexpr _R
2326  __hi64z([[maybe_unused]] _Tp __x)
2327  {
2328  _R __r{};
2329  if constexpr (sizeof(_Tp) == 16)
2330  __builtin_memcpy(&__r, reinterpret_cast<const char*>(&__x) + 8, 8);
2331  return __r;
2332  }
2333 
2334 // }}}
2335 // __lo/__hi128{{{
2336 template <typename _Tp>
2337  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2338  __lo128(_Tp __x)
2339  { return __extract<0, sizeof(_Tp) / 16>(__x); }
2340 
2341 template <typename _Tp>
2342  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2343  __hi128(_Tp __x)
2344  {
2345  static_assert(sizeof(__x) == 32);
2346  return __extract<1, 2>(__x);
2347  }
2348 
2349 // }}}
2350 // __lo/__hi256{{{
2351 template <typename _Tp>
2352  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2353  __lo256(_Tp __x)
2354  {
2355  static_assert(sizeof(__x) == 64);
2356  return __extract<0, 2>(__x);
2357  }
2358 
2359 template <typename _Tp>
2360  _GLIBCXX_SIMD_INTRINSIC constexpr auto
2361  __hi256(_Tp __x)
2362  {
2363  static_assert(sizeof(__x) == 64);
2364  return __extract<1, 2>(__x);
2365  }
2366 
2367 // }}}
2368 // __auto_bitcast{{{
2369 template <typename _Tp>
2370  struct _AutoCast
2371  {
2372  static_assert(__is_vector_type_v<_Tp>);
2373 
2374  const _Tp __x;
2375 
2376  template <typename _Up, typename _UVT = _VectorTraits<_Up>>
2377  _GLIBCXX_SIMD_INTRINSIC constexpr operator _Up() const
2378  { return __intrin_bitcast<typename _UVT::type>(__x); }
2379  };
2380 
2381 template <typename _Tp>
2382  _GLIBCXX_SIMD_INTRINSIC constexpr _AutoCast<_Tp>
2383  __auto_bitcast(const _Tp& __x)
2384  { return {__x}; }
2385 
2386 template <typename _Tp, size_t _Np>
2387  _GLIBCXX_SIMD_INTRINSIC constexpr
2388  _AutoCast<typename _SimdWrapper<_Tp, _Np>::_BuiltinType>
2389  __auto_bitcast(const _SimdWrapper<_Tp, _Np>& __x)
2390  { return {__x._M_data}; }
2391 
2392 // }}}
2393 // ^^^ ---- builtin vector types [[gnu::vector_size(N)]] and operations ---- ^^^
2394 
2395 #if _GLIBCXX_SIMD_HAVE_SSE_ABI
2396 // __bool_storage_member_type{{{
2397 #if _GLIBCXX_SIMD_HAVE_AVX512F && _GLIBCXX_SIMD_X86INTRIN
2398 template <size_t _Size>
2399  struct __bool_storage_member_type
2400  {
2401  static_assert((_Size & (_Size - 1)) != 0,
2402  "This trait may only be used for non-power-of-2 sizes. "
2403  "Power-of-2 sizes must be specialized.");
2404  using type =
2405  typename __bool_storage_member_type<std::__bit_ceil(_Size)>::type;
2406  };
2407 
2408 template <>
2409  struct __bool_storage_member_type<1> { using type = bool; };
2410 
2411 template <>
2412  struct __bool_storage_member_type<2> { using type = __mmask8; };
2413 
2414 template <>
2415  struct __bool_storage_member_type<4> { using type = __mmask8; };
2416 
2417 template <>
2418  struct __bool_storage_member_type<8> { using type = __mmask8; };
2419 
2420 template <>
2421  struct __bool_storage_member_type<16> { using type = __mmask16; };
2422 
2423 template <>
2424  struct __bool_storage_member_type<32> { using type = __mmask32; };
2425 
2426 template <>
2427  struct __bool_storage_member_type<64> { using type = __mmask64; };
2428 #endif // _GLIBCXX_SIMD_HAVE_AVX512F
2429 
2430 // }}}
2431 // __intrinsic_type (x86){{{
2432 // the following excludes bool via __is_vectorizable
2433 #if _GLIBCXX_SIMD_HAVE_SSE
2434 template <typename _Tp, size_t _Bytes>
2435  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 64>>
2436  {
2437  static_assert(!is_same_v<_Tp, long double>,
2438  "no __intrinsic_type support for long double on x86");
2439 
2440  static constexpr size_t _S_VBytes = _Bytes <= 16 ? 16 : _Bytes <= 32 ? 32 : 64;
2441 
2442  using type [[__gnu__::__vector_size__(_S_VBytes)]]
2443  = conditional_t<is_integral_v<_Tp>, long long int, _Tp>;
2444  };
2445 #endif // _GLIBCXX_SIMD_HAVE_SSE
2446 
2447 // }}}
2448 #endif // _GLIBCXX_SIMD_HAVE_SSE_ABI
2449 // __intrinsic_type (ARM){{{
2450 #if _GLIBCXX_SIMD_HAVE_NEON
2451 template <>
2452  struct __intrinsic_type<float, 8, void>
2453  { using type = float32x2_t; };
2454 
2455 template <>
2456  struct __intrinsic_type<float, 16, void>
2457  { using type = float32x4_t; };
2458 
2459 template <>
2460  struct __intrinsic_type<double, 8, void>
2461  {
2462 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2463  using type = float64x1_t;
2464 #endif
2465  };
2466 
2467 template <>
2468  struct __intrinsic_type<double, 16, void>
2469  {
2470 #if _GLIBCXX_SIMD_HAVE_NEON_A64
2471  using type = float64x2_t;
2472 #endif
2473  };
2474 
2475 #define _GLIBCXX_SIMD_ARM_INTRIN(_Bits, _Np) \
2476 template <> \
2477  struct __intrinsic_type<__int_with_sizeof_t<_Bits / 8>, \
2478  _Np * _Bits / 8, void> \
2479  { using type = int##_Bits##x##_Np##_t; }; \
2480 template <> \
2481  struct __intrinsic_type<make_unsigned_t<__int_with_sizeof_t<_Bits / 8>>, \
2482  _Np * _Bits / 8, void> \
2483  { using type = uint##_Bits##x##_Np##_t; }
2484 _GLIBCXX_SIMD_ARM_INTRIN(8, 8);
2485 _GLIBCXX_SIMD_ARM_INTRIN(8, 16);
2486 _GLIBCXX_SIMD_ARM_INTRIN(16, 4);
2487 _GLIBCXX_SIMD_ARM_INTRIN(16, 8);
2488 _GLIBCXX_SIMD_ARM_INTRIN(32, 2);
2489 _GLIBCXX_SIMD_ARM_INTRIN(32, 4);
2490 _GLIBCXX_SIMD_ARM_INTRIN(64, 1);
2491 _GLIBCXX_SIMD_ARM_INTRIN(64, 2);
2492 #undef _GLIBCXX_SIMD_ARM_INTRIN
2493 
2494 template <typename _Tp, size_t _Bytes>
2495  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2496  {
2497  static constexpr int _SVecBytes = _Bytes <= 8 ? 8 : 16;
2498 
2499  using _Ip = __int_for_sizeof_t<_Tp>;
2500 
2501  using _Up = conditional_t<
2502  is_floating_point_v<_Tp>, _Tp,
2503  conditional_t<is_unsigned_v<_Tp>, make_unsigned_t<_Ip>, _Ip>>;
2504 
2505  static_assert(!is_same_v<_Tp, _Up> || _SVecBytes != _Bytes,
2506  "should use explicit specialization above");
2507 
2508  using type = typename __intrinsic_type<_Up, _SVecBytes>::type;
2509  };
2510 #endif // _GLIBCXX_SIMD_HAVE_NEON
2511 
2512 // }}}
2513 // __intrinsic_type (PPC){{{
2514 #ifdef __ALTIVEC__
2515 template <typename _Tp>
2516  struct __intrinsic_type_impl;
2517 
2518 #define _GLIBCXX_SIMD_PPC_INTRIN(_Tp) \
2519  template <> \
2520  struct __intrinsic_type_impl<_Tp> { using type = __vector _Tp; }
2521 _GLIBCXX_SIMD_PPC_INTRIN(float);
2522 #ifdef __VSX__
2523 _GLIBCXX_SIMD_PPC_INTRIN(double);
2524 #endif
2525 _GLIBCXX_SIMD_PPC_INTRIN(signed char);
2526 _GLIBCXX_SIMD_PPC_INTRIN(unsigned char);
2527 _GLIBCXX_SIMD_PPC_INTRIN(signed short);
2528 _GLIBCXX_SIMD_PPC_INTRIN(unsigned short);
2529 _GLIBCXX_SIMD_PPC_INTRIN(signed int);
2530 _GLIBCXX_SIMD_PPC_INTRIN(unsigned int);
2531 #if defined __VSX__ || __SIZEOF_LONG__ == 4
2532 _GLIBCXX_SIMD_PPC_INTRIN(signed long);
2533 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long);
2534 #endif
2535 #ifdef __VSX__
2536 _GLIBCXX_SIMD_PPC_INTRIN(signed long long);
2537 _GLIBCXX_SIMD_PPC_INTRIN(unsigned long long);
2538 #endif
2539 #undef _GLIBCXX_SIMD_PPC_INTRIN
2540 
2541 template <typename _Tp, size_t _Bytes>
2542  struct __intrinsic_type<_Tp, _Bytes, enable_if_t<__is_vectorizable_v<_Tp> && _Bytes <= 16>>
2543  {
2544  static constexpr bool _S_is_ldouble = is_same_v<_Tp, long double>;
2545 
2546  // allow _Tp == long double with -mlong-double-64
2547  static_assert(!(_S_is_ldouble && sizeof(long double) > sizeof(double)),
2548  "no __intrinsic_type support for 128-bit floating point on PowerPC");
2549 
2550 #ifndef __VSX__
2551  static_assert(!(is_same_v<_Tp, double>
2552  || (_S_is_ldouble && sizeof(long double) == sizeof(double))),
2553  "no __intrinsic_type support for 64-bit floating point on PowerPC w/o VSX");
2554 #endif
2555 
2556  static constexpr auto __element_type()
2557  {
2558  if constexpr (is_floating_point_v<_Tp>)
2559  {
2560  if constexpr (_S_is_ldouble)
2561  return double {};
2562  else
2563  return _Tp {};
2564  }
2565  else if constexpr (is_signed_v<_Tp>)
2566  {
2567  if constexpr (sizeof(_Tp) == sizeof(_SChar))
2568  return _SChar {};
2569  else if constexpr (sizeof(_Tp) == sizeof(short))
2570  return short {};
2571  else if constexpr (sizeof(_Tp) == sizeof(int))
2572  return int {};
2573  else if constexpr (sizeof(_Tp) == sizeof(_LLong))
2574  return _LLong {};
2575  }
2576  else
2577  {
2578  if constexpr (sizeof(_Tp) == sizeof(_UChar))
2579  return _UChar {};
2580  else if constexpr (sizeof(_Tp) == sizeof(_UShort))
2581  return _UShort {};
2582  else if constexpr (sizeof(_Tp) == sizeof(_UInt))
2583  return _UInt {};
2584  else if constexpr (sizeof(_Tp) == sizeof(_ULLong))
2585  return _ULLong {};
2586  }
2587  }
2588 
2589  using type = typename __intrinsic_type_impl<decltype(__element_type())>::type;
2590  };
2591 #endif // __ALTIVEC__
2592 
2593 // }}}
2594 // _SimdWrapper<bool>{{{1
2595 template <size_t _Width>
2596  struct _SimdWrapper<bool, _Width,
2597  void_t<typename __bool_storage_member_type<_Width>::type>>
2598  {
2599  using _BuiltinType = typename __bool_storage_member_type<_Width>::type;
2600  using value_type = bool;
2601 
2602  static constexpr size_t _S_full_size = sizeof(_BuiltinType) * __CHAR_BIT__;
2603 
2604  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<bool, _S_full_size>
2605  __as_full_vector() const
2606  { return _M_data; }
2607 
2608  _GLIBCXX_SIMD_INTRINSIC constexpr
2609  _SimdWrapper() = default;
2610 
2611  _GLIBCXX_SIMD_INTRINSIC constexpr
2612  _SimdWrapper(_BuiltinType __k) : _M_data(__k) {};
2613 
2614  _GLIBCXX_SIMD_INTRINSIC
2615  operator const _BuiltinType&() const
2616  { return _M_data; }
2617 
2618  _GLIBCXX_SIMD_INTRINSIC
2619  operator _BuiltinType&()
2620  { return _M_data; }
2621 
2622  _GLIBCXX_SIMD_INTRINSIC _BuiltinType
2623  __intrin() const
2624  { return _M_data; }
2625 
2626  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2627  operator[](size_t __i) const
2628  { return _M_data & (_BuiltinType(1) << __i); }
2629 
2630  template <size_t __i>
2631  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
2632  operator[](_SizeConstant<__i>) const
2633  { return _M_data & (_BuiltinType(1) << __i); }
2634 
2635  _GLIBCXX_SIMD_INTRINSIC constexpr void
2636  _M_set(size_t __i, value_type __x)
2637  {
2638  if (__x)
2639  _M_data |= (_BuiltinType(1) << __i);
2640  else
2641  _M_data &= ~(_BuiltinType(1) << __i);
2642  }
2643 
2644  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2645  _M_is_constprop() const
2646  { return __builtin_constant_p(_M_data); }
2647 
2648  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2649  _M_is_constprop_none_of() const
2650  {
2651  if (__builtin_constant_p(_M_data))
2652  {
2653  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2654  constexpr _BuiltinType __active_mask
2655  = ~_BuiltinType() >> (__nbits - _Width);
2656  return (_M_data & __active_mask) == 0;
2657  }
2658  return false;
2659  }
2660 
2661  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2662  _M_is_constprop_all_of() const
2663  {
2664  if (__builtin_constant_p(_M_data))
2665  {
2666  constexpr int __nbits = sizeof(_BuiltinType) * __CHAR_BIT__;
2667  constexpr _BuiltinType __active_mask
2668  = ~_BuiltinType() >> (__nbits - _Width);
2669  return (_M_data & __active_mask) == __active_mask;
2670  }
2671  return false;
2672  }
2673 
2674  _BuiltinType _M_data;
2675  };
2676 
2677 // _SimdWrapperBase{{{1
2678 template <bool _MustZeroInitPadding, typename _BuiltinType>
2679  struct _SimdWrapperBase;
2680 
2681 template <typename _BuiltinType>
2682  struct _SimdWrapperBase<false, _BuiltinType> // no padding or no SNaNs
2683  {
2684  _GLIBCXX_SIMD_INTRINSIC constexpr
2685  _SimdWrapperBase() = default;
2686 
2687  _GLIBCXX_SIMD_INTRINSIC constexpr
2688  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2689 
2690  _BuiltinType _M_data;
2691  };
2692 
2693 template <typename _BuiltinType>
2694  struct _SimdWrapperBase<true, _BuiltinType> // with padding that needs to
2695  // never become SNaN
2696  {
2697  _GLIBCXX_SIMD_INTRINSIC constexpr
2698  _SimdWrapperBase() : _M_data() {}
2699 
2700  _GLIBCXX_SIMD_INTRINSIC constexpr
2701  _SimdWrapperBase(_BuiltinType __init) : _M_data(__init) {}
2702 
2703  _BuiltinType _M_data;
2704  };
2705 
2706 // }}}
2707 // _SimdWrapper{{{
2708 struct _DisabledSimdWrapper;
2709 
2710 template <typename _Tp, size_t _Width>
2711  struct _SimdWrapper<
2712  _Tp, _Width,
2713  void_t<__vector_type_t<_Tp, _Width>, __intrinsic_type_t<_Tp, _Width>>>
2714  : _SimdWrapperBase<__has_iec559_behavior<__signaling_NaN, _Tp>::value
2715  && sizeof(_Tp) * _Width
2716  == sizeof(__vector_type_t<_Tp, _Width>),
2717  __vector_type_t<_Tp, _Width>>
2718  {
2719  static constexpr bool _S_need_default_init
2720  = __has_iec559_behavior<__signaling_NaN, _Tp>::value
2721  and sizeof(_Tp) * _Width == sizeof(__vector_type_t<_Tp, _Width>);
2722 
2723  using _BuiltinType = __vector_type_t<_Tp, _Width>;
2724 
2725  using _Base = _SimdWrapperBase<_S_need_default_init, _BuiltinType>;
2726 
2727  static_assert(__is_vectorizable_v<_Tp>);
2728  static_assert(_Width >= 2); // 1 doesn't make sense, use _Tp directly then
2729 
2730  using value_type = _Tp;
2731 
2732  static inline constexpr size_t _S_full_size
2733  = sizeof(_BuiltinType) / sizeof(value_type);
2734  static inline constexpr int _S_size = _Width;
2735  static inline constexpr bool _S_is_partial = _S_full_size != _S_size;
2736 
2737  using _Base::_M_data;
2738 
2739  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper<_Tp, _S_full_size>
2740  __as_full_vector() const
2741  { return _M_data; }
2742 
2743  _GLIBCXX_SIMD_INTRINSIC constexpr
2744  _SimdWrapper(initializer_list<_Tp> __init)
2745  : _Base(__generate_from_n_evaluations<_Width, _BuiltinType>(
2746  [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
2747  return __init.begin()[__i.value];
2748  })) {}
2749 
2750  _GLIBCXX_SIMD_INTRINSIC constexpr
2751  _SimdWrapper() = default;
2752 
2753  _GLIBCXX_SIMD_INTRINSIC constexpr
2754  _SimdWrapper(const _SimdWrapper&) = default;
2755 
2756  _GLIBCXX_SIMD_INTRINSIC constexpr
2757  _SimdWrapper(_SimdWrapper&&) = default;
2758 
2759  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2760  operator=(const _SimdWrapper&) = default;
2761 
2762  _GLIBCXX_SIMD_INTRINSIC constexpr _SimdWrapper&
2763  operator=(_SimdWrapper&&) = default;
2764 
2765  // Convert from exactly matching __vector_type_t
2766  using _SimdWrapperBase<_S_need_default_init, _BuiltinType>::_SimdWrapperBase;
2767 
2768  // Convert from __intrinsic_type_t if __intrinsic_type_t and __vector_type_t differ, otherwise
2769  // this ctor should not exist. Making the argument type unusable is our next best solution.
2770  _GLIBCXX_SIMD_INTRINSIC constexpr
2771  _SimdWrapper(conditional_t<is_same_v<_BuiltinType, __intrinsic_type_t<_Tp, _Width>>,
2772  _DisabledSimdWrapper, __intrinsic_type_t<_Tp, _Width>> __x)
2773  : _Base(__vector_bitcast<_Tp, _Width>(__x)) {}
2774 
2775  // Convert from different __vector_type_t, but only if bit reinterpretation is a correct
2776  // conversion of the value_type
2777  template <typename _V, typename _TVT = _VectorTraits<_V>,
2778  typename = enable_if_t<sizeof(typename _TVT::value_type) == sizeof(_Tp)
2779  and sizeof(_V) == sizeof(_BuiltinType)
2780  and is_integral_v<_Tp>
2781  and is_integral_v<typename _TVT::value_type>>>
2782  _GLIBCXX_SIMD_INTRINSIC constexpr
2783  _SimdWrapper(_V __x)
2784  : _Base(reinterpret_cast<_BuiltinType>(__x)) {}
2785 
2786  template <typename... _As,
2787  typename = enable_if_t<((is_same_v<simd_abi::scalar, _As> && ...)
2788  && sizeof...(_As) <= _Width)>>
2789  _GLIBCXX_SIMD_INTRINSIC constexpr
2790  operator _SimdTuple<_Tp, _As...>() const
2791  {
2792  return __generate_from_n_evaluations<sizeof...(_As), _SimdTuple<_Tp, _As...>>(
2793  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
2794  { return _M_data[int(__i)]; });
2795  }
2796 
2797  _GLIBCXX_SIMD_INTRINSIC constexpr
2798  operator const _BuiltinType&() const
2799  { return _M_data; }
2800 
2801  _GLIBCXX_SIMD_INTRINSIC constexpr
2802  operator _BuiltinType&()
2803  { return _M_data; }
2804 
2805  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2806  operator[](size_t __i) const
2807  { return _M_data[__i]; }
2808 
2809  template <size_t __i>
2810  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
2811  operator[](_SizeConstant<__i>) const
2812  { return _M_data[__i]; }
2813 
2814  _GLIBCXX_SIMD_INTRINSIC constexpr void
2815  _M_set(size_t __i, _Tp __x)
2816  {
2817  if (__builtin_is_constant_evaluated())
2818  _M_data = __generate_from_n_evaluations<_Width, _BuiltinType>([&](auto __j) {
2819  return __j == __i ? __x : _M_data[__j()];
2820  });
2821  else
2822  _M_data[__i] = __x;
2823  }
2824 
2825  _GLIBCXX_SIMD_INTRINSIC
2826  constexpr bool
2827  _M_is_constprop() const
2828  { return __builtin_constant_p(_M_data); }
2829 
2830  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2831  _M_is_constprop_none_of() const
2832  {
2833  if (__builtin_constant_p(_M_data))
2834  {
2835  bool __r = true;
2836  if constexpr (is_floating_point_v<_Tp>)
2837  {
2838  using _Ip = __int_for_sizeof_t<_Tp>;
2839  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2840  __execute_n_times<_Width>(
2841  [&](auto __i) { __r &= __intdata[__i.value] == _Ip(); });
2842  }
2843  else
2844  __execute_n_times<_Width>(
2845  [&](auto __i) { __r &= _M_data[__i.value] == _Tp(); });
2846  if (__builtin_constant_p(__r))
2847  return __r;
2848  }
2849  return false;
2850  }
2851 
2852  _GLIBCXX_SIMD_INTRINSIC constexpr bool
2853  _M_is_constprop_all_of() const
2854  {
2855  if (__builtin_constant_p(_M_data))
2856  {
2857  bool __r = true;
2858  if constexpr (is_floating_point_v<_Tp>)
2859  {
2860  using _Ip = __int_for_sizeof_t<_Tp>;
2861  const auto __intdata = __vector_bitcast<_Ip>(_M_data);
2862  __execute_n_times<_Width>(
2863  [&](auto __i) { __r &= __intdata[__i.value] == ~_Ip(); });
2864  }
2865  else
2866  __execute_n_times<_Width>(
2867  [&](auto __i) { __r &= _M_data[__i.value] == ~_Tp(); });
2868  if (__builtin_constant_p(__r))
2869  return __r;
2870  }
2871  return false;
2872  }
2873  };
2874 
2875 // }}}
2876 
2877 // __vectorized_sizeof {{{
2878 template <typename _Tp>
2879  constexpr size_t
2880  __vectorized_sizeof()
2881  {
2882  if constexpr (!__is_vectorizable_v<_Tp>)
2883  return 0;
2884 
2885  if constexpr (sizeof(_Tp) <= 8)
2886  {
2887  // X86:
2888  if constexpr (__have_avx512bw)
2889  return 64;
2890  if constexpr (__have_avx512f && sizeof(_Tp) >= 4)
2891  return 64;
2892  if constexpr (__have_avx2)
2893  return 32;
2894  if constexpr (__have_avx && is_floating_point_v<_Tp>)
2895  return 32;
2896  if constexpr (__have_sse2)
2897  return 16;
2898  if constexpr (__have_sse && is_same_v<_Tp, float>)
2899  return 16;
2900  /* The following is too much trouble because of mixed MMX and x87 code.
2901  * While nothing here explicitly calls MMX instructions of registers,
2902  * they are still emitted but no EMMS cleanup is done.
2903  if constexpr (__have_mmx && sizeof(_Tp) <= 4 && is_integral_v<_Tp>)
2904  return 8;
2905  */
2906 
2907  // PowerPC:
2908  if constexpr (__have_power8vec
2909  || (__have_power_vmx && (sizeof(_Tp) < 8))
2910  || (__have_power_vsx && is_floating_point_v<_Tp>) )
2911  return 16;
2912 
2913  // ARM:
2914  if constexpr (__have_neon_a64)
2915  return 16;
2916  if constexpr (__have_neon_a32 and (not is_floating_point_v<_Tp>
2917  or is_same_v<_Tp, float>))
2918  return 16;
2919  if constexpr (__have_neon
2920  && sizeof(_Tp) < 8
2921  // Only allow fp if the user allows non-ICE559 fp (e.g.
2922  // via -ffast-math). ARMv7 NEON fp is not conforming to
2923  // IEC559.
2924  && (__support_neon_float || !is_floating_point_v<_Tp>))
2925  return 16;
2926  }
2927 
2928  return sizeof(_Tp);
2929  }
2930 
2931 // }}}
2932 namespace simd_abi {
2933 // most of simd_abi is defined in simd_detail.h
2934 template <typename _Tp>
2935  inline constexpr int max_fixed_size
2936  = (__have_avx512bw && sizeof(_Tp) == 1) ? 64 : 32;
2937 
2938 // compatible {{{
2939 #if defined __x86_64__ || defined __aarch64__
2940 template <typename _Tp>
2941  using compatible = conditional_t<(sizeof(_Tp) <= 8), _VecBuiltin<16>, scalar>;
2942 #elif defined __ARM_NEON
2943 // FIXME: not sure, probably needs to be scalar (or dependent on the hard-float
2944 // ABI?)
2945 template <typename _Tp>
2946  using compatible
2947  = conditional_t<(sizeof(_Tp) < 8
2948  && (__support_neon_float || !is_floating_point_v<_Tp>)),
2949  _VecBuiltin<16>, scalar>;
2950 #else
2951 template <typename>
2952  using compatible = scalar;
2953 #endif
2954 
2955 // }}}
2956 // native {{{
2957 template <typename _Tp>
2958  constexpr auto
2959  __determine_native_abi()
2960  {
2961  constexpr size_t __bytes = __vectorized_sizeof<_Tp>();
2962  if constexpr (__bytes == sizeof(_Tp))
2963  return static_cast<scalar*>(nullptr);
2964  else if constexpr (__have_avx512vl || (__have_avx512f && __bytes == 64))
2965  return static_cast<_VecBltnBtmsk<__bytes>*>(nullptr);
2966  else
2967  return static_cast<_VecBuiltin<__bytes>*>(nullptr);
2968  }
2969 
2970 template <typename _Tp, typename = enable_if_t<__is_vectorizable_v<_Tp>>>
2971  using native = remove_pointer_t<decltype(__determine_native_abi<_Tp>())>;
2972 
2973 // }}}
2974 // __default_abi {{{
2975 #if defined _GLIBCXX_SIMD_DEFAULT_ABI
2976 template <typename _Tp>
2977  using __default_abi = _GLIBCXX_SIMD_DEFAULT_ABI<_Tp>;
2978 #else
2979 template <typename _Tp>
2980  using __default_abi = compatible<_Tp>;
2981 #endif
2982 
2983 // }}}
2984 } // namespace simd_abi
2985 
2986 // traits {{{1
2987 template <typename _Tp>
2988  struct is_simd_flag_type
2989  : false_type
2990  {};
2991 
2992 template <>
2993  struct is_simd_flag_type<element_aligned_tag>
2994  : true_type
2995  {};
2996 
2997 template <>
2998  struct is_simd_flag_type<vector_aligned_tag>
2999  : true_type
3000  {};
3001 
3002 template <size_t _Np>
3003  struct is_simd_flag_type<overaligned_tag<_Np>>
3004  : __bool_constant<(_Np > 0) and __has_single_bit(_Np)>
3005  {};
3006 
3007 template <typename _Tp>
3008  inline constexpr bool is_simd_flag_type_v = is_simd_flag_type<_Tp>::value;
3009 
3010 template <typename _Tp, typename = enable_if_t<is_simd_flag_type_v<_Tp>>>
3011  using _IsSimdFlagType = _Tp;
3012 
3013 // is_abi_tag {{{2
3014 template <typename _Tp, typename = void_t<>>
3015  struct is_abi_tag : false_type {};
3016 
3017 template <typename _Tp>
3018  struct is_abi_tag<_Tp, void_t<typename _Tp::_IsValidAbiTag>>
3019  : public _Tp::_IsValidAbiTag {};
3020 
3021 template <typename _Tp>
3022  inline constexpr bool is_abi_tag_v = is_abi_tag<_Tp>::value;
3023 
3024 // is_simd(_mask) {{{2
3025 template <typename _Tp>
3026  struct is_simd : public false_type {};
3027 
3028 template <typename _Tp>
3029  inline constexpr bool is_simd_v = is_simd<_Tp>::value;
3030 
3031 template <typename _Tp>
3032  struct is_simd_mask : public false_type {};
3033 
3034 template <typename _Tp>
3035 inline constexpr bool is_simd_mask_v = is_simd_mask<_Tp>::value;
3036 
3037 // simd_size {{{2
3038 template <typename _Tp, typename _Abi, typename = void>
3039  struct __simd_size_impl {};
3040 
3041 template <typename _Tp, typename _Abi>
3042  struct __simd_size_impl<
3043  _Tp, _Abi,
3044  enable_if_t<conjunction_v<__is_vectorizable<_Tp>, is_abi_tag<_Abi>>>>
3045  : _SizeConstant<_Abi::template _S_size<_Tp>> {};
3046 
3047 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3048  struct simd_size : __simd_size_impl<_Tp, _Abi> {};
3049 
3050 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3051  inline constexpr size_t simd_size_v = simd_size<_Tp, _Abi>::value;
3052 
3053 // simd_abi::deduce {{{2
3054 template <typename _Tp, size_t _Np, typename = void>
3055  struct __deduce_impl;
3056 
3057 namespace simd_abi {
3058 /**
3059  * @tparam _Tp The requested `value_type` for the elements.
3060  * @tparam _Np The requested number of elements.
3061  * @tparam _Abis This parameter is ignored, since this implementation cannot
3062  * make any use of it. Either __a good native ABI is matched and used as `type`
3063  * alias, or the `fixed_size<_Np>` ABI is used, which internally is built from
3064  * the best matching native ABIs.
3065  */
3066 template <typename _Tp, size_t _Np, typename...>
3067  struct deduce : __deduce_impl<_Tp, _Np> {};
3068 
3069 template <typename _Tp, size_t _Np, typename... _Abis>
3070  using deduce_t = typename deduce<_Tp, _Np, _Abis...>::type;
3071 } // namespace simd_abi
3072 
3073 // }}}2
3074 // rebind_simd {{{2
3075 template <typename _Tp, typename _V, typename = void>
3076  struct rebind_simd;
3077 
3078 template <typename _Tp, typename _Up, typename _Abi>
3079  struct rebind_simd<_Tp, simd<_Up, _Abi>,
3080  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3081  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3082 
3083 template <typename _Tp, typename _Up, typename _Abi>
3084  struct rebind_simd<_Tp, simd_mask<_Up, _Abi>,
3085  void_t<simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>>
3086  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, simd_size_v<_Up, _Abi>, _Abi>>; };
3087 
3088 template <typename _Tp, typename _V>
3089  using rebind_simd_t = typename rebind_simd<_Tp, _V>::type;
3090 
3091 // resize_simd {{{2
3092 template <int _Np, typename _V, typename = void>
3093  struct resize_simd;
3094 
3095 template <int _Np, typename _Tp, typename _Abi>
3096  struct resize_simd<_Np, simd<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3097  { using type = simd<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3098 
3099 template <int _Np, typename _Tp, typename _Abi>
3100  struct resize_simd<_Np, simd_mask<_Tp, _Abi>, void_t<simd_abi::deduce_t<_Tp, _Np, _Abi>>>
3101  { using type = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np, _Abi>>; };
3102 
3103 template <int _Np, typename _V>
3104  using resize_simd_t = typename resize_simd<_Np, _V>::type;
3105 
3106 // }}}2
3107 // memory_alignment {{{2
3108 template <typename _Tp, typename _Up = typename _Tp::value_type>
3109  struct memory_alignment
3110  : public _SizeConstant<vector_aligned_tag::_S_alignment<_Tp, _Up>> {};
3111 
3112 template <typename _Tp, typename _Up = typename _Tp::value_type>
3113  inline constexpr size_t memory_alignment_v = memory_alignment<_Tp, _Up>::value;
3114 
3115 // class template simd [simd] {{{1
3116 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3117  class simd;
3118 
3119 template <typename _Tp, typename _Abi>
3120  struct is_simd<simd<_Tp, _Abi>> : public true_type {};
3121 
3122 template <typename _Tp>
3123  using native_simd = simd<_Tp, simd_abi::native<_Tp>>;
3124 
3125 template <typename _Tp, int _Np>
3126  using fixed_size_simd = simd<_Tp, simd_abi::fixed_size<_Np>>;
3127 
3128 template <typename _Tp, size_t _Np>
3129  using __deduced_simd = simd<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3130 
3131 // class template simd_mask [simd_mask] {{{1
3132 template <typename _Tp, typename _Abi = simd_abi::__default_abi<_Tp>>
3133  class simd_mask;
3134 
3135 template <typename _Tp, typename _Abi>
3136  struct is_simd_mask<simd_mask<_Tp, _Abi>> : public true_type {};
3137 
3138 template <typename _Tp>
3139  using native_simd_mask = simd_mask<_Tp, simd_abi::native<_Tp>>;
3140 
3141 template <typename _Tp, int _Np>
3142  using fixed_size_simd_mask = simd_mask<_Tp, simd_abi::fixed_size<_Np>>;
3143 
3144 template <typename _Tp, size_t _Np>
3145  using __deduced_simd_mask = simd_mask<_Tp, simd_abi::deduce_t<_Tp, _Np>>;
3146 
3147 // casts [simd.casts] {{{1
3148 // static_simd_cast {{{2
3149 template <typename _Tp, typename _Up, typename _Ap, bool = is_simd_v<_Tp>, typename = void>
3150  struct __static_simd_cast_return_type;
3151 
3152 template <typename _Tp, typename _A0, typename _Up, typename _Ap>
3153  struct __static_simd_cast_return_type<simd_mask<_Tp, _A0>, _Up, _Ap, false, void>
3154  : __static_simd_cast_return_type<simd<_Tp, _A0>, _Up, _Ap> {};
3155 
3156 template <typename _Tp, typename _Up, typename _Ap>
3157  struct __static_simd_cast_return_type<
3158  _Tp, _Up, _Ap, true, enable_if_t<_Tp::size() == simd_size_v<_Up, _Ap>>>
3159  { using type = _Tp; };
3160 
3161 template <typename _Tp, typename _Ap>
3162  struct __static_simd_cast_return_type<_Tp, _Tp, _Ap, false,
3163 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3164  enable_if_t<__is_vectorizable_v<_Tp>>
3165 #else
3166  void
3167 #endif
3168  >
3169  { using type = simd<_Tp, _Ap>; };
3170 
3171 template <typename _Tp, typename = void>
3172  struct __safe_make_signed { using type = _Tp;};
3173 
3174 template <typename _Tp>
3175  struct __safe_make_signed<_Tp, enable_if_t<is_integral_v<_Tp>>>
3176  {
3177  // the extra make_unsigned_t is because of PR85951
3178  using type = make_signed_t<make_unsigned_t<_Tp>>;
3179  };
3180 
3181 template <typename _Tp>
3182  using safe_make_signed_t = typename __safe_make_signed<_Tp>::type;
3183 
3184 template <typename _Tp, typename _Up, typename _Ap>
3185  struct __static_simd_cast_return_type<_Tp, _Up, _Ap, false,
3186 #ifdef _GLIBCXX_SIMD_FIX_P2TS_ISSUE66
3187  enable_if_t<__is_vectorizable_v<_Tp>>
3188 #else
3189  void
3190 #endif
3191  >
3192  {
3193  using type = conditional_t<
3194  (is_integral_v<_Up> && is_integral_v<_Tp> &&
3195 #ifndef _GLIBCXX_SIMD_FIX_P2TS_ISSUE65
3196  is_signed_v<_Up> != is_signed_v<_Tp> &&
3197 #endif
3198  is_same_v<safe_make_signed_t<_Up>, safe_make_signed_t<_Tp>>),
3199  simd<_Tp, _Ap>, fixed_size_simd<_Tp, simd_size_v<_Up, _Ap>>>;
3200  };
3201 
3202 template <typename _Tp, typename _Up, typename _Ap,
3203  typename _R
3204  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3205  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _R
3206  static_simd_cast(const simd<_Up, _Ap>& __x)
3207  {
3208  if constexpr (is_same<_R, simd<_Up, _Ap>>::value)
3209  return __x;
3210  else
3211  {
3212  _SimdConverter<_Up, _Ap, typename _R::value_type, typename _R::abi_type>
3213  __c;
3214  return _R(__private_init, __c(__data(__x)));
3215  }
3216  }
3217 
3218 namespace __proposed {
3219 template <typename _Tp, typename _Up, typename _Ap,
3220  typename _R
3221  = typename __static_simd_cast_return_type<_Tp, _Up, _Ap>::type>
3222  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR typename _R::mask_type
3223  static_simd_cast(const simd_mask<_Up, _Ap>& __x)
3224  {
3225  using _RM = typename _R::mask_type;
3226  return {__private_init, _RM::abi_type::_MaskImpl::template _S_convert<
3227  typename _RM::simd_type::value_type>(__x)};
3228  }
3229 
3230 template <typename _To, typename _Up, typename _Abi>
3231  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3232  _To
3233  simd_bit_cast(const simd<_Up, _Abi>& __x)
3234  {
3235  using _Tp = typename _To::value_type;
3236  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3237  using _From = simd<_Up, _Abi>;
3238  using _FromMember = typename _SimdTraits<_Up, _Abi>::_SimdMember;
3239  // with concepts, the following should be constraints
3240  static_assert(sizeof(_To) == sizeof(_From));
3241  static_assert(is_trivially_copyable_v<_Tp> && is_trivially_copyable_v<_Up>);
3242  static_assert(is_trivially_copyable_v<_ToMember> && is_trivially_copyable_v<_FromMember>);
3243 #if __has_builtin(__builtin_bit_cast)
3244  return {__private_init, __builtin_bit_cast(_ToMember, __data(__x))};
3245 #else
3246  return {__private_init, __bit_cast<_ToMember>(__data(__x))};
3247 #endif
3248  }
3249 
3250 template <typename _To, typename _Up, typename _Abi>
3251  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3252  _To
3253  simd_bit_cast(const simd_mask<_Up, _Abi>& __x)
3254  {
3255  using _From = simd_mask<_Up, _Abi>;
3256  static_assert(sizeof(_To) == sizeof(_From));
3257  static_assert(is_trivially_copyable_v<_From>);
3258  // _To can be simd<T, A>, specifically simd<T, fixed_size<N>> in which case _To is not trivially
3259  // copyable.
3260  if constexpr (is_simd_v<_To>)
3261  {
3262  using _Tp = typename _To::value_type;
3263  using _ToMember = typename _SimdTraits<_Tp, typename _To::abi_type>::_SimdMember;
3264  static_assert(is_trivially_copyable_v<_ToMember>);
3265 #if __has_builtin(__builtin_bit_cast)
3266  return {__private_init, __builtin_bit_cast(_ToMember, __x)};
3267 #else
3268  return {__private_init, __bit_cast<_ToMember>(__x)};
3269 #endif
3270  }
3271  else
3272  {
3273  static_assert(is_trivially_copyable_v<_To>);
3274 #if __has_builtin(__builtin_bit_cast)
3275  return __builtin_bit_cast(_To, __x);
3276 #else
3277  return __bit_cast<_To>(__x);
3278 #endif
3279  }
3280  }
3281 } // namespace __proposed
3282 
3283 // simd_cast {{{2
3284 template <typename _Tp, typename _Up, typename _Ap,
3285  typename _To = __value_type_or_identity_t<_Tp>>
3286  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3287  simd_cast(const simd<_ValuePreserving<_Up, _To>, _Ap>& __x)
3288  -> decltype(static_simd_cast<_Tp>(__x))
3289  { return static_simd_cast<_Tp>(__x); }
3290 
3291 namespace __proposed {
3292 template <typename _Tp, typename _Up, typename _Ap,
3293  typename _To = __value_type_or_identity_t<_Tp>>
3294  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR auto
3295  simd_cast(const simd_mask<_ValuePreserving<_Up, _To>, _Ap>& __x)
3296  -> decltype(static_simd_cast<_Tp>(__x))
3297  { return static_simd_cast<_Tp>(__x); }
3298 } // namespace __proposed
3299 
3300 // }}}2
3301 // resizing_simd_cast {{{
3302 namespace __proposed {
3303 /* Proposed spec:
3304 
3305 template <class T, class U, class Abi>
3306 T resizing_simd_cast(const simd<U, Abi>& x)
3307 
3308 p1 Constraints:
3309  - is_simd_v<T> is true and
3310  - T::value_type is the same type as U
3311 
3312 p2 Returns:
3313  A simd object with the i^th element initialized to x[i] for all i in the
3314  range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3315  than simd_size_v<U, Abi>, the remaining elements are value-initialized.
3316 
3317 template <class T, class U, class Abi>
3318 T resizing_simd_cast(const simd_mask<U, Abi>& x)
3319 
3320 p1 Constraints: is_simd_mask_v<T> is true
3321 
3322 p2 Returns:
3323  A simd_mask object with the i^th element initialized to x[i] for all i in
3324 the range of [0, min(T::size(), simd_size_v<U, Abi>)). If T::size() is larger
3325  than simd_size_v<U, Abi>, the remaining elements are initialized to false.
3326 
3327  */
3328 
3329 template <typename _Tp, typename _Up, typename _Ap>
3330  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR enable_if_t<
3331  conjunction_v<is_simd<_Tp>, is_same<typename _Tp::value_type, _Up>>, _Tp>
3332  resizing_simd_cast(const simd<_Up, _Ap>& __x)
3333  {
3334  if constexpr (is_same_v<typename _Tp::abi_type, _Ap>)
3335  return __x;
3336  else if (__builtin_is_constant_evaluated())
3337  return _Tp([&](auto __i) constexpr {
3338  return __i < simd_size_v<_Up, _Ap> ? __x[__i] : _Up();
3339  });
3340  else if constexpr (simd_size_v<_Up, _Ap> == 1)
3341  {
3342  _Tp __r{};
3343  __r[0] = __x[0];
3344  return __r;
3345  }
3346  else if constexpr (_Tp::size() == 1)
3347  return __x[0];
3348  else if constexpr (sizeof(_Tp) == sizeof(__x)
3349  && !__is_fixed_size_abi_v<_Ap>)
3350  return {__private_init,
3351  __vector_bitcast<typename _Tp::value_type, _Tp::size()>(
3352  _Ap::_S_masked(__data(__x))._M_data)};
3353  else
3354  {
3355  _Tp __r{};
3356  __builtin_memcpy(&__data(__r), &__data(__x),
3357  sizeof(_Up)
3358  * std::min(_Tp::size(), simd_size_v<_Up, _Ap>));
3359  return __r;
3360  }
3361  }
3362 
3363 template <typename _Tp, typename _Up, typename _Ap>
3364  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3365  enable_if_t<is_simd_mask_v<_Tp>, _Tp>
3366  resizing_simd_cast(const simd_mask<_Up, _Ap>& __x)
3367  {
3368  return {__private_init, _Tp::abi_type::_MaskImpl::template _S_convert<
3369  typename _Tp::simd_type::value_type>(__x)};
3370  }
3371 } // namespace __proposed
3372 
3373 // }}}
3374 // to_fixed_size {{{2
3375 template <typename _Tp, int _Np>
3376  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, _Np>
3377  to_fixed_size(const fixed_size_simd<_Tp, _Np>& __x)
3378  { return __x; }
3379 
3380 template <typename _Tp, int _Np>
3381  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, _Np>
3382  to_fixed_size(const fixed_size_simd_mask<_Tp, _Np>& __x)
3383  { return __x; }
3384 
3385 template <typename _Tp, typename _Ap>
3386  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>
3387  to_fixed_size(const simd<_Tp, _Ap>& __x)
3388  {
3389  using _Rp = fixed_size_simd<_Tp, simd_size_v<_Tp, _Ap>>;
3390  return _Rp([&__x](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3391  }
3392 
3393 template <typename _Tp, typename _Ap>
3394  _GLIBCXX_SIMD_INTRINSIC fixed_size_simd_mask<_Tp, simd_size_v<_Tp, _Ap>>
3395  to_fixed_size(const simd_mask<_Tp, _Ap>& __x)
3396  {
3397  return {__private_init,
3398  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; }};
3399  }
3400 
3401 // to_native {{{2
3402 template <typename _Tp, int _Np>
3403  _GLIBCXX_SIMD_INTRINSIC
3404  enable_if_t<(_Np == native_simd<_Tp>::size()), native_simd<_Tp>>
3405  to_native(const fixed_size_simd<_Tp, _Np>& __x)
3406  {
3407  alignas(memory_alignment_v<native_simd<_Tp>>) _Tp __mem[_Np];
3408  __x.copy_to(__mem, vector_aligned);
3409  return {__mem, vector_aligned};
3410  }
3411 
3412 template <typename _Tp, int _Np>
3413  _GLIBCXX_SIMD_INTRINSIC
3414  enable_if_t<(_Np == native_simd_mask<_Tp>::size()), native_simd_mask<_Tp>>
3415  to_native(const fixed_size_simd_mask<_Tp, _Np>& __x)
3416  {
3417  return native_simd_mask<_Tp>(
3418  __private_init,
3419  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3420  }
3421 
3422 // to_compatible {{{2
3423 template <typename _Tp, int _Np>
3424  _GLIBCXX_SIMD_INTRINSIC enable_if_t<(_Np == simd<_Tp>::size()), simd<_Tp>>
3425  to_compatible(const simd<_Tp, simd_abi::fixed_size<_Np>>& __x)
3426  {
3427  alignas(memory_alignment_v<simd<_Tp>>) _Tp __mem[_Np];
3428  __x.copy_to(__mem, vector_aligned);
3429  return {__mem, vector_aligned};
3430  }
3431 
3432 template <typename _Tp, int _Np>
3433  _GLIBCXX_SIMD_INTRINSIC
3434  enable_if_t<(_Np == simd_mask<_Tp>::size()), simd_mask<_Tp>>
3435  to_compatible(const simd_mask<_Tp, simd_abi::fixed_size<_Np>>& __x)
3436  {
3437  return simd_mask<_Tp>(
3438  __private_init,
3439  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
3440  }
3441 
3442 // masked assignment [simd_mask.where] {{{1
3443 
3444 // where_expression {{{1
3445 // const_where_expression<M, T> {{{2
3446 template <typename _M, typename _Tp>
3447  class const_where_expression
3448  {
3449  using _V = _Tp;
3450  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3451 
3452  struct _Wrapper { using value_type = _V; };
3453 
3454  protected:
3455  using _Impl = typename _V::_Impl;
3456 
3457  using value_type =
3458  typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3459 
3460  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3461  __get_mask(const const_where_expression& __x)
3462  { return __x._M_k; }
3463 
3464  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3465  __get_lvalue(const const_where_expression& __x)
3466  { return __x._M_value; }
3467 
3468  const _M& _M_k;
3469  _Tp& _M_value;
3470 
3471  public:
3472  const_where_expression(const const_where_expression&) = delete;
3473 
3474  const_where_expression& operator=(const const_where_expression&) = delete;
3475 
3476  _GLIBCXX_SIMD_INTRINSIC constexpr
3477  const_where_expression(const _M& __kk, const _Tp& dd)
3478  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3479 
3480  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3481  operator-() const&&
3482  {
3483  return {__private_init,
3484  _Impl::template _S_masked_unary<negate>(__data(_M_k),
3485  __data(_M_value))};
3486  }
3487 
3488  template <typename _Up, typename _Flags>
3489  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3490  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3491  {
3492  return {__private_init,
3493  _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3494  _Flags::template _S_apply<_V>(__mem))};
3495  }
3496 
3497  template <typename _Up, typename _Flags>
3498  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3499  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3500  {
3501  _Impl::_S_masked_store(__data(_M_value),
3502  _Flags::template _S_apply<_V>(__mem),
3503  __data(_M_k));
3504  }
3505  };
3506 
3507 // const_where_expression<bool, T> {{{2
3508 template <typename _Tp>
3509  class const_where_expression<bool, _Tp>
3510  {
3511  using _M = bool;
3512  using _V = _Tp;
3513 
3514  static_assert(is_same_v<_V, __remove_cvref_t<_Tp>>);
3515 
3516  struct _Wrapper { using value_type = _V; };
3517 
3518  protected:
3519  using value_type
3520  = typename conditional_t<is_arithmetic_v<_V>, _Wrapper, _V>::value_type;
3521 
3522  _GLIBCXX_SIMD_INTRINSIC friend const _M&
3523  __get_mask(const const_where_expression& __x)
3524  { return __x._M_k; }
3525 
3526  _GLIBCXX_SIMD_INTRINSIC friend const _Tp&
3527  __get_lvalue(const const_where_expression& __x)
3528  { return __x._M_value; }
3529 
3530  const bool _M_k;
3531  _Tp& _M_value;
3532 
3533  public:
3534  const_where_expression(const const_where_expression&) = delete;
3535  const_where_expression& operator=(const const_where_expression&) = delete;
3536 
3537  _GLIBCXX_SIMD_INTRINSIC constexpr
3538  const_where_expression(const bool __kk, const _Tp& dd)
3539  : _M_k(__kk), _M_value(const_cast<_Tp&>(dd)) {}
3540 
3541  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3542  operator-() const&&
3543  { return _M_k ? -_M_value : _M_value; }
3544 
3545  template <typename _Up, typename _Flags>
3546  [[nodiscard]] _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _V
3547  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3548  { return _M_k ? static_cast<_V>(__mem[0]) : _M_value; }
3549 
3550  template <typename _Up, typename _Flags>
3551  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3552  copy_to(_LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) const&&
3553  {
3554  if (_M_k)
3555  __mem[0] = _M_value;
3556  }
3557  };
3558 
3559 // where_expression<M, T> {{{2
3560 template <typename _M, typename _Tp>
3561  class where_expression : public const_where_expression<_M, _Tp>
3562  {
3563  using _Impl = typename const_where_expression<_M, _Tp>::_Impl;
3564 
3565  static_assert(!is_const<_Tp>::value,
3566  "where_expression may only be instantiated with __a non-const "
3567  "_Tp parameter");
3568 
3569  using typename const_where_expression<_M, _Tp>::value_type;
3570  using const_where_expression<_M, _Tp>::_M_k;
3571  using const_where_expression<_M, _Tp>::_M_value;
3572 
3573  static_assert(
3574  is_same<typename _M::abi_type, typename _Tp::abi_type>::value, "");
3575  static_assert(_M::size() == _Tp::size(), "");
3576 
3577  _GLIBCXX_SIMD_INTRINSIC friend constexpr _Tp&
3578  __get_lvalue(where_expression& __x)
3579  { return __x._M_value; }
3580 
3581  public:
3582  where_expression(const where_expression&) = delete;
3583  where_expression& operator=(const where_expression&) = delete;
3584 
3585  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3586  where_expression(const _M& __kk, _Tp& dd)
3587  : const_where_expression<_M, _Tp>(__kk, dd) {}
3588 
3589  template <typename _Up>
3590  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3591  operator=(_Up&& __x) &&
3592  {
3593  _Impl::_S_masked_assign(__data(_M_k), __data(_M_value),
3594  __to_value_type_or_member_type<_Tp>(
3595  static_cast<_Up&&>(__x)));
3596  }
3597 
3598 #define _GLIBCXX_SIMD_OP_(__op, __name) \
3599  template <typename _Up> \
3600  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3601  operator __op##=(_Up&& __x)&& \
3602  { \
3603  _Impl::template _S_masked_cassign( \
3604  __data(_M_k), __data(_M_value), \
3605  __to_value_type_or_member_type<_Tp>(static_cast<_Up&&>(__x)), \
3606  [](auto __impl, auto __lhs, auto __rhs) \
3607  constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA \
3608  { return __impl.__name(__lhs, __rhs); }); \
3609  } \
3610  static_assert(true)
3611  _GLIBCXX_SIMD_OP_(+, _S_plus);
3612  _GLIBCXX_SIMD_OP_(-, _S_minus);
3613  _GLIBCXX_SIMD_OP_(*, _S_multiplies);
3614  _GLIBCXX_SIMD_OP_(/, _S_divides);
3615  _GLIBCXX_SIMD_OP_(%, _S_modulus);
3616  _GLIBCXX_SIMD_OP_(&, _S_bit_and);
3617  _GLIBCXX_SIMD_OP_(|, _S_bit_or);
3618  _GLIBCXX_SIMD_OP_(^, _S_bit_xor);
3619  _GLIBCXX_SIMD_OP_(<<, _S_shift_left);
3620  _GLIBCXX_SIMD_OP_(>>, _S_shift_right);
3621 #undef _GLIBCXX_SIMD_OP_
3622 
3623  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3624  operator++() &&
3625  {
3626  __data(_M_value)
3627  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3628  }
3629 
3630  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3631  operator++(int) &&
3632  {
3633  __data(_M_value)
3634  = _Impl::template _S_masked_unary<__increment>(__data(_M_k), __data(_M_value));
3635  }
3636 
3637  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3638  operator--() &&
3639  {
3640  __data(_M_value)
3641  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3642  }
3643 
3644  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3645  operator--(int) &&
3646  {
3647  __data(_M_value)
3648  = _Impl::template _S_masked_unary<__decrement>(__data(_M_k), __data(_M_value));
3649  }
3650 
3651  // intentionally hides const_where_expression::copy_from
3652  template <typename _Up, typename _Flags>
3653  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3654  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3655  {
3656  __data(_M_value) = _Impl::_S_masked_load(__data(_M_value), __data(_M_k),
3657  _Flags::template _S_apply<_Tp>(__mem));
3658  }
3659  };
3660 
3661 // where_expression<bool, T> {{{2
3662 template <typename _Tp>
3663  class where_expression<bool, _Tp>
3664  : public const_where_expression<bool, _Tp>
3665  {
3666  using _M = bool;
3667  using typename const_where_expression<_M, _Tp>::value_type;
3668  using const_where_expression<_M, _Tp>::_M_k;
3669  using const_where_expression<_M, _Tp>::_M_value;
3670 
3671  public:
3672  where_expression(const where_expression&) = delete;
3673  where_expression& operator=(const where_expression&) = delete;
3674 
3675  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3676  where_expression(const _M& __kk, _Tp& dd)
3677  : const_where_expression<_M, _Tp>(__kk, dd) {}
3678 
3679 #define _GLIBCXX_SIMD_OP_(__op) \
3680  template <typename _Up> \
3681  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void \
3682  operator __op(_Up&& __x)&& \
3683  { if (_M_k) _M_value __op static_cast<_Up&&>(__x); }
3684 
3685  _GLIBCXX_SIMD_OP_(=)
3686  _GLIBCXX_SIMD_OP_(+=)
3687  _GLIBCXX_SIMD_OP_(-=)
3688  _GLIBCXX_SIMD_OP_(*=)
3689  _GLIBCXX_SIMD_OP_(/=)
3690  _GLIBCXX_SIMD_OP_(%=)
3691  _GLIBCXX_SIMD_OP_(&=)
3692  _GLIBCXX_SIMD_OP_(|=)
3693  _GLIBCXX_SIMD_OP_(^=)
3694  _GLIBCXX_SIMD_OP_(<<=)
3695  _GLIBCXX_SIMD_OP_(>>=)
3696  #undef _GLIBCXX_SIMD_OP_
3697 
3698  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3699  operator++() &&
3700  { if (_M_k) ++_M_value; }
3701 
3702  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3703  operator++(int) &&
3704  { if (_M_k) ++_M_value; }
3705 
3706  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3707  operator--() &&
3708  { if (_M_k) --_M_value; }
3709 
3710  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3711  operator--(int) &&
3712  { if (_M_k) --_M_value; }
3713 
3714  // intentionally hides const_where_expression::copy_from
3715  template <typename _Up, typename _Flags>
3716  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR void
3717  copy_from(const _LoadStorePtr<_Up, value_type>* __mem, _IsSimdFlagType<_Flags>) &&
3718  { if (_M_k) _M_value = __mem[0]; }
3719  };
3720 
3721 // where {{{1
3722 template <typename _Tp, typename _Ap>
3723  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3724  where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3725  where(const typename simd<_Tp, _Ap>::mask_type& __k, simd<_Tp, _Ap>& __value)
3726  { return {__k, __value}; }
3727 
3728 template <typename _Tp, typename _Ap>
3729  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3730  const_where_expression<simd_mask<_Tp, _Ap>, simd<_Tp, _Ap>>
3731  where(const typename simd<_Tp, _Ap>::mask_type& __k, const simd<_Tp, _Ap>& __value)
3732  { return {__k, __value}; }
3733 
3734 template <typename _Tp, typename _Ap>
3735  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3736  where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3737  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, simd_mask<_Tp, _Ap>& __value)
3738  { return {__k, __value}; }
3739 
3740 template <typename _Tp, typename _Ap>
3741  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3742  const_where_expression<simd_mask<_Tp, _Ap>, simd_mask<_Tp, _Ap>>
3743  where(const remove_const_t<simd_mask<_Tp, _Ap>>& __k, const simd_mask<_Tp, _Ap>& __value)
3744  { return {__k, __value}; }
3745 
3746 template <typename _Tp>
3747  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR where_expression<bool, _Tp>
3748  where(_ExactBool __k, _Tp& __value)
3749  { return {__k, __value}; }
3750 
3751 template <typename _Tp>
3752  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR const_where_expression<bool, _Tp>
3753  where(_ExactBool __k, const _Tp& __value)
3754  { return {__k, __value}; }
3755 
3756 template <typename _Tp, typename _Ap>
3757  _GLIBCXX_SIMD_CONSTEXPR void
3758  where(bool __k, simd<_Tp, _Ap>& __value) = delete;
3759 
3760 template <typename _Tp, typename _Ap>
3761  _GLIBCXX_SIMD_CONSTEXPR void
3762  where(bool __k, const simd<_Tp, _Ap>& __value) = delete;
3763 
3764 // proposed mask iterations {{{1
3765 namespace __proposed {
3766 template <size_t _Np>
3767  class where_range
3768  {
3769  const bitset<_Np> __bits;
3770 
3771  public:
3772  where_range(bitset<_Np> __b) : __bits(__b) {}
3773 
3774  class iterator
3775  {
3776  size_t __mask;
3777  size_t __bit;
3778 
3779  _GLIBCXX_SIMD_INTRINSIC void
3780  __next_bit()
3781  { __bit = __builtin_ctzl(__mask); }
3782 
3783  _GLIBCXX_SIMD_INTRINSIC void
3784  __reset_lsb()
3785  {
3786  // 01100100 - 1 = 01100011
3787  __mask &= (__mask - 1);
3788  // __asm__("btr %1,%0" : "+r"(__mask) : "r"(__bit));
3789  }
3790 
3791  public:
3792  iterator(decltype(__mask) __m) : __mask(__m) { __next_bit(); }
3793  iterator(const iterator&) = default;
3794  iterator(iterator&&) = default;
3795 
3796  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3797  operator->() const
3798  { return __bit; }
3799 
3800  _GLIBCXX_SIMD_ALWAYS_INLINE size_t
3801  operator*() const
3802  { return __bit; }
3803 
3804  _GLIBCXX_SIMD_ALWAYS_INLINE iterator&
3805  operator++()
3806  {
3807  __reset_lsb();
3808  __next_bit();
3809  return *this;
3810  }
3811 
3812  _GLIBCXX_SIMD_ALWAYS_INLINE iterator
3813  operator++(int)
3814  {
3815  iterator __tmp = *this;
3816  __reset_lsb();
3817  __next_bit();
3818  return __tmp;
3819  }
3820 
3821  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3822  operator==(const iterator& __rhs) const
3823  { return __mask == __rhs.__mask; }
3824 
3825  _GLIBCXX_SIMD_ALWAYS_INLINE bool
3826  operator!=(const iterator& __rhs) const
3827  { return __mask != __rhs.__mask; }
3828  };
3829 
3830  iterator
3831  begin() const
3832  { return __bits.to_ullong(); }
3833 
3834  iterator
3835  end() const
3836  { return 0; }
3837  };
3838 
3839 template <typename _Tp, typename _Ap>
3840  where_range<simd_size_v<_Tp, _Ap>>
3841  where(const simd_mask<_Tp, _Ap>& __k)
3842  { return __k.__to_bitset(); }
3843 
3844 } // namespace __proposed
3845 
3846 // }}}1
3847 // reductions [simd.reductions] {{{1
3848 template <typename _Tp, typename _Abi, typename _BinaryOperation = plus<>>
3849  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3850  reduce(const simd<_Tp, _Abi>& __v, _BinaryOperation __binary_op = _BinaryOperation())
3851  { return _Abi::_SimdImpl::_S_reduce(__v, __binary_op); }
3852 
3853 template <typename _M, typename _V, typename _BinaryOperation = plus<>>
3854  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3855  reduce(const const_where_expression<_M, _V>& __x,
3856  typename _V::value_type __identity_element, _BinaryOperation __binary_op)
3857  {
3858  if (__builtin_expect(none_of(__get_mask(__x)), false))
3859  return __identity_element;
3860 
3861  _V __tmp = __identity_element;
3862  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3863  __data(__get_lvalue(__x)));
3864  return reduce(__tmp, __binary_op);
3865  }
3866 
3867 template <typename _M, typename _V>
3868  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3869  reduce(const const_where_expression<_M, _V>& __x, plus<> __binary_op = {})
3870  { return reduce(__x, 0, __binary_op); }
3871 
3872 template <typename _M, typename _V>
3873  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3874  reduce(const const_where_expression<_M, _V>& __x, multiplies<> __binary_op)
3875  { return reduce(__x, 1, __binary_op); }
3876 
3877 template <typename _M, typename _V>
3878  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3879  reduce(const const_where_expression<_M, _V>& __x, bit_and<> __binary_op)
3880  { return reduce(__x, ~typename _V::value_type(), __binary_op); }
3881 
3882 template <typename _M, typename _V>
3883  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3884  reduce(const const_where_expression<_M, _V>& __x, bit_or<> __binary_op)
3885  { return reduce(__x, 0, __binary_op); }
3886 
3887 template <typename _M, typename _V>
3888  _GLIBCXX_SIMD_INTRINSIC typename _V::value_type
3889  reduce(const const_where_expression<_M, _V>& __x, bit_xor<> __binary_op)
3890  { return reduce(__x, 0, __binary_op); }
3891 
3892 template <typename _Tp, typename _Abi>
3893  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3894  hmin(const simd<_Tp, _Abi>& __v) noexcept
3895  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Minimum()); }
3896 
3897 template <typename _Tp, typename _Abi>
3898  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR _Tp
3899  hmax(const simd<_Tp, _Abi>& __v) noexcept
3900  { return _Abi::_SimdImpl::_S_reduce(__v, __detail::_Maximum()); }
3901 
3902 template <typename _M, typename _V>
3903  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3904  typename _V::value_type
3905  hmin(const const_where_expression<_M, _V>& __x) noexcept
3906  {
3907  using _Tp = typename _V::value_type;
3908  constexpr _Tp __id_elem =
3909 #ifdef __FINITE_MATH_ONLY__
3910  __finite_max_v<_Tp>;
3911 #else
3912  __value_or<__infinity, _Tp>(__finite_max_v<_Tp>);
3913 #endif
3914  _V __tmp = __id_elem;
3915  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3916  __data(__get_lvalue(__x)));
3917  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Minimum());
3918  }
3919 
3920 template <typename _M, typename _V>
3921  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3922  typename _V::value_type
3923  hmax(const const_where_expression<_M, _V>& __x) noexcept
3924  {
3925  using _Tp = typename _V::value_type;
3926  constexpr _Tp __id_elem =
3927 #ifdef __FINITE_MATH_ONLY__
3928  __finite_min_v<_Tp>;
3929 #else
3930  [] {
3931  if constexpr (__value_exists_v<__infinity, _Tp>)
3932  return -__infinity_v<_Tp>;
3933  else
3934  return __finite_min_v<_Tp>;
3935  }();
3936 #endif
3937  _V __tmp = __id_elem;
3938  _V::_Impl::_S_masked_assign(__data(__get_mask(__x)), __data(__tmp),
3939  __data(__get_lvalue(__x)));
3940  return _V::abi_type::_SimdImpl::_S_reduce(__tmp, __detail::_Maximum());
3941  }
3942 
3943 // }}}1
3944 // algorithms [simd.alg] {{{
3945 template <typename _Tp, typename _Ap>
3946  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3947  min(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3948  { return {__private_init, _Ap::_SimdImpl::_S_min(__data(__a), __data(__b))}; }
3949 
3950 template <typename _Tp, typename _Ap>
3951  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3952  max(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3953  { return {__private_init, _Ap::_SimdImpl::_S_max(__data(__a), __data(__b))}; }
3954 
3955 template <typename _Tp, typename _Ap>
3956  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
3957  pair<simd<_Tp, _Ap>, simd<_Tp, _Ap>>
3958  minmax(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
3959  {
3960  const auto pair_of_members
3961  = _Ap::_SimdImpl::_S_minmax(__data(__a), __data(__b));
3962  return {simd<_Tp, _Ap>(__private_init, pair_of_members.first),
3963  simd<_Tp, _Ap>(__private_init, pair_of_members.second)};
3964  }
3965 
3966 template <typename _Tp, typename _Ap>
3967  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
3968  clamp(const simd<_Tp, _Ap>& __v, const simd<_Tp, _Ap>& __lo, const simd<_Tp, _Ap>& __hi)
3969  {
3970  using _Impl = typename _Ap::_SimdImpl;
3971  return {__private_init,
3972  _Impl::_S_min(__data(__hi),
3973  _Impl::_S_max(__data(__lo), __data(__v)))};
3974  }
3975 
3976 // }}}
3977 
3978 template <size_t... _Sizes, typename _Tp, typename _Ap,
3979  typename = enable_if_t<((_Sizes + ...) == simd<_Tp, _Ap>::size())>>
3980  inline tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
3981  split(const simd<_Tp, _Ap>&);
3982 
3983 // __extract_part {{{
3984 template <int _Index, int _Total, int _Combine = 1, typename _Tp, size_t _Np>
3985  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_CONST constexpr
3986  conditional_t<_Np == _Total and _Combine == 1, _Tp, _SimdWrapper<_Tp, _Np / _Total * _Combine>>
3987  __extract_part(const _SimdWrapper<_Tp, _Np> __x);
3988 
3989 template <int _Index, int _Parts, int _Combine = 1, typename _Tp, typename _A0, typename... _As>
3990  _GLIBCXX_SIMD_INTRINSIC constexpr auto
3991  __extract_part(const _SimdTuple<_Tp, _A0, _As...>& __x);
3992 
3993 // }}}
3994 // _SizeList {{{
3995 template <size_t _V0, size_t... _Values>
3996  struct _SizeList
3997  {
3998  template <size_t _I>
3999  static constexpr size_t
4000  _S_at(_SizeConstant<_I> = {})
4001  {
4002  if constexpr (_I == 0)
4003  return _V0;
4004  else
4005  return _SizeList<_Values...>::template _S_at<_I - 1>();
4006  }
4007 
4008  template <size_t _I>
4009  static constexpr auto
4010  _S_before(_SizeConstant<_I> = {})
4011  {
4012  if constexpr (_I == 0)
4013  return _SizeConstant<0>();
4014  else
4015  return _SizeConstant<
4016  _V0 + _SizeList<_Values...>::template _S_before<_I - 1>()>();
4017  }
4018 
4019  template <size_t _Np>
4020  static constexpr auto
4021  _S_pop_front(_SizeConstant<_Np> = {})
4022  {
4023  if constexpr (_Np == 0)
4024  return _SizeList();
4025  else
4026  return _SizeList<_Values...>::template _S_pop_front<_Np - 1>();
4027  }
4028  };
4029 
4030 // }}}
4031 // __extract_center {{{
4032 template <typename _Tp, size_t _Np>
4033  _GLIBCXX_SIMD_INTRINSIC _SimdWrapper<_Tp, _Np / 2>
4034  __extract_center(_SimdWrapper<_Tp, _Np> __x)
4035  {
4036  static_assert(_Np >= 4);
4037  static_assert(_Np % 4 == 0); // x0 - x1 - x2 - x3 -> return {x1, x2}
4038 #if _GLIBCXX_SIMD_X86INTRIN // {{{
4039  if constexpr (__have_avx512f && sizeof(_Tp) * _Np == 64)
4040  {
4041  const auto __intrin = __to_intrin(__x);
4042  if constexpr (is_integral_v<_Tp>)
4043  return __vector_bitcast<_Tp>(_mm512_castsi512_si256(
4044  _mm512_shuffle_i32x4(__intrin, __intrin,
4045  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4046  else if constexpr (sizeof(_Tp) == 4)
4047  return __vector_bitcast<_Tp>(_mm512_castps512_ps256(
4048  _mm512_shuffle_f32x4(__intrin, __intrin,
4049  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4050  else if constexpr (sizeof(_Tp) == 8)
4051  return __vector_bitcast<_Tp>(_mm512_castpd512_pd256(
4052  _mm512_shuffle_f64x2(__intrin, __intrin,
4053  1 + 2 * 0x4 + 2 * 0x10 + 3 * 0x40)));
4054  else
4055  __assert_unreachable<_Tp>();
4056  }
4057  else if constexpr (sizeof(_Tp) * _Np == 32 && is_floating_point_v<_Tp>)
4058  return __vector_bitcast<_Tp>(
4059  _mm_shuffle_pd(__lo128(__vector_bitcast<double>(__x)),
4060  __hi128(__vector_bitcast<double>(__x)), 1));
4061  else if constexpr (sizeof(__x) == 32 && sizeof(_Tp) * _Np <= 32)
4062  return __vector_bitcast<_Tp>(
4063  _mm_alignr_epi8(__hi128(__vector_bitcast<_LLong>(__x)),
4064  __lo128(__vector_bitcast<_LLong>(__x)),
4065  sizeof(_Tp) * _Np / 4));
4066  else
4067 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
4068  {
4069  __vector_type_t<_Tp, _Np / 2> __r;
4070  __builtin_memcpy(&__r,
4071  reinterpret_cast<const char*>(&__x)
4072  + sizeof(_Tp) * _Np / 4,
4073  sizeof(_Tp) * _Np / 2);
4074  return __r;
4075  }
4076  }
4077 
4078 template <typename _Tp, typename _A0, typename... _As>
4079  _GLIBCXX_SIMD_INTRINSIC
4080  _SimdWrapper<_Tp, _SimdTuple<_Tp, _A0, _As...>::_S_size() / 2>
4081  __extract_center(const _SimdTuple<_Tp, _A0, _As...>& __x)
4082  {
4083  if constexpr (sizeof...(_As) == 0)
4084  return __extract_center(__x.first);
4085  else
4086  return __extract_part<1, 4, 2>(__x);
4087  }
4088 
4089 // }}}
4090 // __split_wrapper {{{
4091 template <size_t... _Sizes, typename _Tp, typename... _As>
4092  auto
4093  __split_wrapper(_SizeList<_Sizes...>, const _SimdTuple<_Tp, _As...>& __x)
4094  {
4095  return split<_Sizes...>(
4096  fixed_size_simd<_Tp, _SimdTuple<_Tp, _As...>::_S_size()>(__private_init,
4097  __x));
4098  }
4099 
4100 // }}}
4101 
4102 // split<simd>(simd) {{{
4103 template <typename _V, typename _Ap,
4104  size_t _Parts = simd_size_v<typename _V::value_type, _Ap> / _V::size()>
4105  enable_if_t<simd_size_v<typename _V::value_type, _Ap> == _Parts * _V::size()
4106  && is_simd_v<_V>, array<_V, _Parts>>
4107  split(const simd<typename _V::value_type, _Ap>& __x)
4108  {
4109  using _Tp = typename _V::value_type;
4110  if constexpr (_Parts == 1)
4111  {
4112  return {simd_cast<_V>(__x)};
4113  }
4114  else if (__x._M_is_constprop())
4115  {
4116  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4117  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4118  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4119  { return __x[__i * _V::size() + __j]; });
4120  });
4121  }
4122  else if constexpr (
4123  __is_fixed_size_abi_v<_Ap>
4124  && (is_same_v<typename _V::abi_type, simd_abi::scalar>
4125  || (__is_fixed_size_abi_v<typename _V::abi_type>
4126  && sizeof(_V) == sizeof(_Tp) * _V::size() // _V doesn't have padding
4127  )))
4128  {
4129  // fixed_size -> fixed_size (w/o padding) or scalar
4130 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4131  const __may_alias<_Tp>* const __element_ptr
4132  = reinterpret_cast<const __may_alias<_Tp>*>(&__data(__x));
4133  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4134  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4135  { return _V(__element_ptr + __i * _V::size(), vector_aligned); });
4136 #else
4137  const auto& __xx = __data(__x);
4138  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4139  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4140  [[maybe_unused]] constexpr size_t __offset
4141  = decltype(__i)::value * _V::size();
4142  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4143  constexpr _SizeConstant<__j + __offset> __k;
4144  return __xx[__k];
4145  });
4146  });
4147 #endif
4148  }
4149  else if constexpr (is_same_v<typename _V::abi_type, simd_abi::scalar>)
4150  {
4151  // normally memcpy should work here as well
4152  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4153  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA { return __x[__i]; });
4154  }
4155  else
4156  {
4157  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4158  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4159  if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
4160  return _V([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4161  return __x[__i * _V::size() + __j];
4162  });
4163  else
4164  return _V(__private_init,
4165  __extract_part<decltype(__i)::value, _Parts>(__data(__x)));
4166  });
4167  }
4168  }
4169 
4170 // }}}
4171 // split<simd_mask>(simd_mask) {{{
4172 template <typename _V, typename _Ap,
4173  size_t _Parts = simd_size_v<typename _V::simd_type::value_type, _Ap> / _V::size()>
4174  enable_if_t<is_simd_mask_v<_V> && simd_size_v<typename
4175  _V::simd_type::value_type, _Ap> == _Parts * _V::size(), array<_V, _Parts>>
4176  split(const simd_mask<typename _V::simd_type::value_type, _Ap>& __x)
4177  {
4178  if constexpr (is_same_v<_Ap, typename _V::abi_type>)
4179  return {__x};
4180  else if constexpr (_Parts == 1)
4181  return {__proposed::static_simd_cast<_V>(__x)};
4182  else if constexpr (_Parts == 2 && __is_sse_abi<typename _V::abi_type>()
4183  && __is_avx_abi<_Ap>())
4184  return {_V(__private_init, __lo128(__data(__x))),
4185  _V(__private_init, __hi128(__data(__x)))};
4186  else if constexpr (_V::size() <= __CHAR_BIT__ * sizeof(_ULLong))
4187  {
4188  const bitset __bits = __x.__to_bitset();
4189  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4190  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4191  constexpr size_t __offset = __i * _V::size();
4192  return _V(__bitset_init, (__bits >> __offset).to_ullong());
4193  });
4194  }
4195  else
4196  {
4197  return __generate_from_n_evaluations<_Parts, array<_V, _Parts>>(
4198  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4199  constexpr size_t __offset = __i * _V::size();
4200  return _V(__private_init,
4201  [&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4202  return __x[__j + __offset];
4203  });
4204  });
4205  }
4206  }
4207 
4208 // }}}
4209 // split<_Sizes...>(simd) {{{
4210 template <size_t... _Sizes, typename _Tp, typename _Ap, typename>
4211  _GLIBCXX_SIMD_ALWAYS_INLINE
4212  tuple<simd<_Tp, simd_abi::deduce_t<_Tp, _Sizes>>...>
4213  split(const simd<_Tp, _Ap>& __x)
4214  {
4215  using _SL = _SizeList<_Sizes...>;
4216  using _Tuple = tuple<__deduced_simd<_Tp, _Sizes>...>;
4217  constexpr size_t _Np = simd_size_v<_Tp, _Ap>;
4218  constexpr size_t _N0 = _SL::template _S_at<0>();
4219  using _V = __deduced_simd<_Tp, _N0>;
4220 
4221  if (__x._M_is_constprop())
4222  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4223  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4224  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4225  constexpr size_t __offset = _SL::_S_before(__i);
4226  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4227  return __x[__offset + __j];
4228  });
4229  });
4230  else if constexpr (_Np == _N0)
4231  {
4232  static_assert(sizeof...(_Sizes) == 1);
4233  return {simd_cast<_V>(__x)};
4234  }
4235  else if constexpr // split from fixed_size, such that __x::first.size == _N0
4236  (__is_fixed_size_abi_v<
4237  _Ap> && __fixed_size_storage_t<_Tp, _Np>::_S_first_size == _N0)
4238  {
4239  static_assert(
4240  !__is_fixed_size_abi_v<typename _V::abi_type>,
4241  "How can <_Tp, _Np> be __a single _SimdTuple entry but __a "
4242  "fixed_size_simd "
4243  "when deduced?");
4244  // extract first and recurse (__split_wrapper is needed to deduce a new
4245  // _Sizes pack)
4246  return tuple_cat(make_tuple(_V(__private_init, __data(__x).first)),
4247  __split_wrapper(_SL::template _S_pop_front<1>(),
4248  __data(__x).second));
4249  }
4250  else if constexpr ((!__is_fixed_size_abi_v<simd_abi::deduce_t<_Tp, _Sizes>> && ...))
4251  {
4252  constexpr array<size_t, sizeof...(_Sizes)> __size = {_Sizes...};
4253  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4254  [&](auto __i) constexpr {
4255  constexpr size_t __offset = [&]() {
4256  size_t __r = 0;
4257  for (unsigned __j = 0; __j < __i; ++__j)
4258  __r += __size[__j];
4259  return __r;
4260  }();
4261  return __deduced_simd<_Tp, __size[__i]>(
4262  __private_init,
4263  __extract_part<__offset, _Np, __size[__i]>(__data(__x)));
4264  });
4265  }
4266 #ifdef _GLIBCXX_SIMD_USE_ALIASING_LOADS
4267  const __may_alias<_Tp>* const __element_ptr
4268  = reinterpret_cast<const __may_alias<_Tp>*>(&__x);
4269  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4270  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4271  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4272  constexpr size_t __offset = _SL::_S_before(__i);
4273  constexpr size_t __base_align = alignof(simd<_Tp, _Ap>);
4274  constexpr size_t __a
4275  = __base_align - ((__offset * sizeof(_Tp)) % __base_align);
4276  constexpr size_t __b = ((__a - 1) & __a) ^ __a;
4277  constexpr size_t __alignment = __b == 0 ? __a : __b;
4278  return _Vi(__element_ptr + __offset, overaligned<__alignment>);
4279  });
4280 #else
4281  return __generate_from_n_evaluations<sizeof...(_Sizes), _Tuple>(
4282  [&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4283  using _Vi = __deduced_simd<_Tp, _SL::_S_at(__i)>;
4284  const auto& __xx = __data(__x);
4285  using _Offset = decltype(_SL::_S_before(__i));
4286  return _Vi([&](auto __j) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4287  constexpr _SizeConstant<_Offset::value + __j> __k;
4288  return __xx[__k];
4289  });
4290  });
4291 #endif
4292  }
4293 
4294 // }}}
4295 
4296 // __subscript_in_pack {{{
4297 template <size_t _I, typename _Tp, typename _Ap, typename... _As>
4298  _GLIBCXX_SIMD_INTRINSIC constexpr _Tp
4299  __subscript_in_pack(const simd<_Tp, _Ap>& __x, const simd<_Tp, _As>&... __xs)
4300  {
4301  if constexpr (_I < simd_size_v<_Tp, _Ap>)
4302  return __x[_I];
4303  else
4304  return __subscript_in_pack<_I - simd_size_v<_Tp, _Ap>>(__xs...);
4305  }
4306 
4307 // }}}
4308 // __store_pack_of_simd {{{
4309 template <typename _Tp, typename _A0, typename... _As>
4310  _GLIBCXX_SIMD_INTRINSIC void
4311  __store_pack_of_simd(char* __mem, const simd<_Tp, _A0>& __x0, const simd<_Tp, _As>&... __xs)
4312  {
4313  constexpr size_t __n_bytes = sizeof(_Tp) * simd_size_v<_Tp, _A0>;
4314  __builtin_memcpy(__mem, &__data(__x0), __n_bytes);
4315  if constexpr (sizeof...(__xs) > 0)
4316  __store_pack_of_simd(__mem + __n_bytes, __xs...);
4317  }
4318 
4319 // }}}
4320 // concat(simd...) {{{
4321 template <typename _Tp, typename... _As, typename = __detail::__odr_helper>
4322  inline _GLIBCXX_SIMD_CONSTEXPR
4323  simd<_Tp, simd_abi::deduce_t<_Tp, (simd_size_v<_Tp, _As> + ...)>>
4324  concat(const simd<_Tp, _As>&... __xs)
4325  {
4326  constexpr int _Np = (simd_size_v<_Tp, _As> + ...);
4327  using _Abi = simd_abi::deduce_t<_Tp, _Np>;
4328  using _Rp = simd<_Tp, _Abi>;
4329  using _RW = typename _SimdTraits<_Tp, _Abi>::_SimdMember;
4330  if constexpr (sizeof...(__xs) == 1)
4331  return simd_cast<_Rp>(__xs...);
4332  else if ((... && __xs._M_is_constprop()))
4333  return _Rp([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA
4334  { return __subscript_in_pack<__i>(__xs...); });
4335  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 2)
4336  {
4337  return {__private_init,
4338  __vec_shuffle(__as_vector(__xs)..., std::make_index_sequence<_RW::_S_full_size>(),
4339  [](int __i) {
4340  constexpr int __sizes[2] = {int(simd_size_v<_Tp, _As>)...};
4341  constexpr int __vsizes[2]
4342  = {int(sizeof(__as_vector(__xs)) / sizeof(_Tp))...};
4343  constexpr int __padding0 = __vsizes[0] - __sizes[0];
4344  return __i >= _Np ? -1 : __i < __sizes[0] ? __i : __i + __padding0;
4345  })};
4346  }
4347  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) == 3)
4348  return [](const auto& __x0, const auto& __x1, const auto& __x2)
4349  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4350  return concat(concat(__x0, __x1), __x2);
4351  }(__xs...);
4352  else if constexpr (__is_simd_wrapper_v<_RW> and sizeof...(__xs) > 3)
4353  return [](const auto& __x0, const auto& __x1, const auto&... __rest)
4354  _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4355  return concat(concat(__x0, __x1), concat(__rest...));
4356  }(__xs...);
4357  else
4358  {
4359  _Rp __r{};
4360  __store_pack_of_simd(reinterpret_cast<char*>(&__data(__r)), __xs...);
4361  return __r;
4362  }
4363  }
4364 
4365 // }}}
4366 // concat(array<simd>) {{{
4367 template <typename _Tp, typename _Abi, size_t _Np>
4368  _GLIBCXX_SIMD_ALWAYS_INLINE
4369  _GLIBCXX_SIMD_CONSTEXPR __deduced_simd<_Tp, simd_size_v<_Tp, _Abi> * _Np>
4370  concat(const array<simd<_Tp, _Abi>, _Np>& __x)
4371  {
4372  return __call_with_subscripts<_Np>(
4373  __x, [](const auto&... __xs) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4374  return concat(__xs...);
4375  });
4376  }
4377 
4378 // }}}
4379 
4380 /// @cond undocumented
4381 // _SmartReference {{{
4382 template <typename _Up, typename _Accessor = _Up,
4383  typename _ValueType = typename _Up::value_type>
4384  class _SmartReference
4385  {
4386  friend _Accessor;
4387  int _M_index;
4388  _Up& _M_obj;
4389 
4390  _GLIBCXX_SIMD_INTRINSIC constexpr _ValueType
4391  _M_read() const noexcept
4392  {
4393  if constexpr (is_arithmetic_v<_Up>)
4394  return _M_obj;
4395  else
4396  return _M_obj[_M_index];
4397  }
4398 
4399  template <typename _Tp>
4400  _GLIBCXX_SIMD_INTRINSIC constexpr void
4401  _M_write(_Tp&& __x) const
4402  { _Accessor::_S_set(_M_obj, _M_index, static_cast<_Tp&&>(__x)); }
4403 
4404  public:
4405  _GLIBCXX_SIMD_INTRINSIC constexpr
4406  _SmartReference(_Up& __o, int __i) noexcept
4407  : _M_index(__i), _M_obj(__o) {}
4408 
4409  using value_type = _ValueType;
4410 
4411  _GLIBCXX_SIMD_INTRINSIC
4412  _SmartReference(const _SmartReference&) = delete;
4413 
4414  _GLIBCXX_SIMD_INTRINSIC constexpr
4415  operator value_type() const noexcept
4416  { return _M_read(); }
4417 
4418  template <typename _Tp, typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, value_type>>
4419  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4420  operator=(_Tp&& __x) &&
4421  {
4422  _M_write(static_cast<_Tp&&>(__x));
4423  return {_M_obj, _M_index};
4424  }
4425 
4426 #define _GLIBCXX_SIMD_OP_(__op) \
4427  template <typename _Tp, \
4428  typename _TT = decltype(declval<value_type>() __op declval<_Tp>()), \
4429  typename = _ValuePreservingOrInt<__remove_cvref_t<_Tp>, _TT>, \
4430  typename = _ValuePreservingOrInt<_TT, value_type>> \
4431  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference \
4432  operator __op##=(_Tp&& __x) && \
4433  { \
4434  const value_type& __lhs = _M_read(); \
4435  _M_write(__lhs __op __x); \
4436  return {_M_obj, _M_index}; \
4437  }
4438  _GLIBCXX_SIMD_ALL_ARITHMETICS(_GLIBCXX_SIMD_OP_);
4439  _GLIBCXX_SIMD_ALL_SHIFTS(_GLIBCXX_SIMD_OP_);
4440  _GLIBCXX_SIMD_ALL_BINARY(_GLIBCXX_SIMD_OP_);
4441 #undef _GLIBCXX_SIMD_OP_
4442 
4443  template <typename _Tp = void,
4444  typename = decltype(++declval<conditional_t<true, value_type, _Tp>&>())>
4445  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4446  operator++() &&
4447  {
4448  value_type __x = _M_read();
4449  _M_write(++__x);
4450  return {_M_obj, _M_index};
4451  }
4452 
4453  template <typename _Tp = void,
4454  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()++)>
4455  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4456  operator++(int) &&
4457  {
4458  const value_type __r = _M_read();
4459  value_type __x = __r;
4460  _M_write(++__x);
4461  return __r;
4462  }
4463 
4464  template <typename _Tp = void,
4465  typename = decltype(--declval<conditional_t<true, value_type, _Tp>&>())>
4466  _GLIBCXX_SIMD_INTRINSIC constexpr _SmartReference
4467  operator--() &&
4468  {
4469  value_type __x = _M_read();
4470  _M_write(--__x);
4471  return {_M_obj, _M_index};
4472  }
4473 
4474  template <typename _Tp = void,
4475  typename = decltype(declval<conditional_t<true, value_type, _Tp>&>()--)>
4476  _GLIBCXX_SIMD_INTRINSIC constexpr value_type
4477  operator--(int) &&
4478  {
4479  const value_type __r = _M_read();
4480  value_type __x = __r;
4481  _M_write(--__x);
4482  return __r;
4483  }
4484 
4485  _GLIBCXX_SIMD_INTRINSIC friend void
4486  swap(_SmartReference&& __a, _SmartReference&& __b) noexcept(
4487  conjunction<
4488  is_nothrow_constructible<value_type, _SmartReference&&>,
4489  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4490  {
4491  value_type __tmp = static_cast<_SmartReference&&>(__a);
4492  static_cast<_SmartReference&&>(__a) = static_cast<value_type>(__b);
4493  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4494  }
4495 
4496  _GLIBCXX_SIMD_INTRINSIC friend void
4497  swap(value_type& __a, _SmartReference&& __b) noexcept(
4498  conjunction<
4499  is_nothrow_constructible<value_type, value_type&&>,
4500  is_nothrow_assignable<value_type&, value_type&&>,
4501  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4502  {
4503  value_type __tmp(std::move(__a));
4504  __a = static_cast<value_type>(__b);
4505  static_cast<_SmartReference&&>(__b) = std::move(__tmp);
4506  }
4507 
4508  _GLIBCXX_SIMD_INTRINSIC friend void
4509  swap(_SmartReference&& __a, value_type& __b) noexcept(
4510  conjunction<
4511  is_nothrow_constructible<value_type, _SmartReference&&>,
4512  is_nothrow_assignable<value_type&, value_type&&>,
4513  is_nothrow_assignable<_SmartReference&&, value_type&&>>::value)
4514  {
4515  value_type __tmp(__a);
4516  static_cast<_SmartReference&&>(__a) = std::move(__b);
4517  __b = std::move(__tmp);
4518  }
4519  };
4520 
4521 // }}}
4522 // __scalar_abi_wrapper {{{
4523 template <int _Bytes>
4524  struct __scalar_abi_wrapper
4525  {
4526  template <typename _Tp> static constexpr size_t _S_full_size = 1;
4527  template <typename _Tp> static constexpr size_t _S_size = 1;
4528  template <typename _Tp> static constexpr size_t _S_is_partial = false;
4529 
4530  template <typename _Tp, typename _Abi = simd_abi::scalar>
4531  static constexpr bool _S_is_valid_v
4532  = _Abi::template _IsValid<_Tp>::value && sizeof(_Tp) == _Bytes;
4533  };
4534 
4535 // }}}
4536 // __decay_abi metafunction {{{
4537 template <typename _Tp>
4538  struct __decay_abi { using type = _Tp; };
4539 
4540 template <int _Bytes>
4541  struct __decay_abi<__scalar_abi_wrapper<_Bytes>>
4542  { using type = simd_abi::scalar; };
4543 
4544 // }}}
4545 // __find_next_valid_abi metafunction {{{1
4546 // Given an ABI tag A<N>, find an N2 < N such that A<N2>::_S_is_valid_v<_Tp> ==
4547 // true, N2 is a power-of-2, and A<N2>::_S_is_partial<_Tp> is false. Break
4548 // recursion at 2 elements in the resulting ABI tag. In this case
4549 // type::_S_is_valid_v<_Tp> may be false.
4550 template <template <int> class _Abi, int _Bytes, typename _Tp>
4551  struct __find_next_valid_abi
4552  {
4553  static constexpr auto
4554  _S_choose()
4555  {
4556  constexpr int _NextBytes = std::__bit_ceil(_Bytes) / 2;
4557  using _NextAbi = _Abi<_NextBytes>;
4558  if constexpr (_NextBytes < sizeof(_Tp) * 2) // break recursion
4559  return _Abi<_Bytes>();
4560  else if constexpr (_NextAbi::template _S_is_partial<_Tp> == false
4561  && _NextAbi::template _S_is_valid_v<_Tp>)
4562  return _NextAbi();
4563  else
4564  return __find_next_valid_abi<_Abi, _NextBytes, _Tp>::_S_choose();
4565  }
4566 
4567  using type = decltype(_S_choose());
4568  };
4569 
4570 template <int _Bytes, typename _Tp>
4571  struct __find_next_valid_abi<__scalar_abi_wrapper, _Bytes, _Tp>
4572  { using type = simd_abi::scalar; };
4573 
4574 // _AbiList {{{1
4575 template <template <int> class...>
4576  struct _AbiList
4577  {
4578  template <typename, int> static constexpr bool _S_has_valid_abi = false;
4579  template <typename, int> using _FirstValidAbi = void;
4580  template <typename, int> using _BestAbi = void;
4581  };
4582 
4583 template <template <int> class _A0, template <int> class... _Rest>
4584  struct _AbiList<_A0, _Rest...>
4585  {
4586  template <typename _Tp, int _Np>
4587  static constexpr bool _S_has_valid_abi
4588  = _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<
4589  _Tp> || _AbiList<_Rest...>::template _S_has_valid_abi<_Tp, _Np>;
4590 
4591  template <typename _Tp, int _Np>
4592  using _FirstValidAbi = conditional_t<
4593  _A0<sizeof(_Tp) * _Np>::template _S_is_valid_v<_Tp>,
4594  typename __decay_abi<_A0<sizeof(_Tp) * _Np>>::type,
4595  typename _AbiList<_Rest...>::template _FirstValidAbi<_Tp, _Np>>;
4596 
4597  template <typename _Tp, int _Np>
4598  static constexpr auto
4599  _S_determine_best_abi()
4600  {
4601  static_assert(_Np >= 1);
4602  constexpr int _Bytes = sizeof(_Tp) * _Np;
4603  if constexpr (_Np == 1)
4604  return __make_dependent_t<_Tp, simd_abi::scalar>{};
4605  else
4606  {
4607  constexpr int __fullsize = _A0<_Bytes>::template _S_full_size<_Tp>;
4608  // _A0<_Bytes> is good if:
4609  // 1. The ABI tag is valid for _Tp
4610  // 2. The storage overhead is no more than padding to fill the next
4611  // power-of-2 number of bytes
4612  if constexpr (_A0<_Bytes>::template _S_is_valid_v<
4613  _Tp> && __fullsize / 2 < _Np)
4614  return typename __decay_abi<_A0<_Bytes>>::type{};
4615  else
4616  {
4617  using _Bp =
4618  typename __find_next_valid_abi<_A0, _Bytes, _Tp>::type;
4619  if constexpr (_Bp::template _S_is_valid_v<
4620  _Tp> && _Bp::template _S_size<_Tp> <= _Np)
4621  return _Bp{};
4622  else
4623  return
4624  typename _AbiList<_Rest...>::template _BestAbi<_Tp, _Np>{};
4625  }
4626  }
4627  }
4628 
4629  template <typename _Tp, int _Np>
4630  using _BestAbi = decltype(_S_determine_best_abi<_Tp, _Np>());
4631  };
4632 
4633 // }}}1
4634 
4635 // the following lists all native ABIs, which makes them accessible to
4636 // simd_abi::deduce and select_best_vector_type_t (for fixed_size). Order
4637 // matters: Whatever comes first has higher priority.
4638 using _AllNativeAbis = _AbiList<simd_abi::_VecBltnBtmsk, simd_abi::_VecBuiltin,
4639  __scalar_abi_wrapper>;
4640 
4641 // valid _SimdTraits specialization {{{1
4642 template <typename _Tp, typename _Abi>
4643  struct _SimdTraits<_Tp, _Abi, void_t<typename _Abi::template _IsValid<_Tp>>>
4644  : _Abi::template __traits<_Tp> {};
4645 
4646 // __deduce_impl specializations {{{1
4647 // try all native ABIs (including scalar) first
4648 template <typename _Tp, size_t _Np>
4649  struct __deduce_impl<
4650  _Tp, _Np, enable_if_t<_AllNativeAbis::template _S_has_valid_abi<_Tp, _Np>>>
4651  { using type = _AllNativeAbis::_FirstValidAbi<_Tp, _Np>; };
4652 
4653 // fall back to fixed_size only if scalar and native ABIs don't match
4654 template <typename _Tp, size_t _Np, typename = void>
4655  struct __deduce_fixed_size_fallback {};
4656 
4657 template <typename _Tp, size_t _Np>
4658  struct __deduce_fixed_size_fallback<_Tp, _Np,
4659  enable_if_t<simd_abi::fixed_size<_Np>::template _S_is_valid_v<_Tp>>>
4660  { using type = simd_abi::fixed_size<_Np>; };
4661 
4662 template <typename _Tp, size_t _Np, typename>
4663  struct __deduce_impl : public __deduce_fixed_size_fallback<_Tp, _Np> {};
4664 
4665 //}}}1
4666 /// @endcond
4667 
4668 // simd_mask {{{
4669 template <typename _Tp, typename _Abi>
4670  class simd_mask : public _SimdTraits<_Tp, _Abi>::_MaskBase
4671  {
4672  // types, tags, and friends {{{
4673  using _Traits = _SimdTraits<_Tp, _Abi>;
4674  using _MemberType = typename _Traits::_MaskMember;
4675 
4676  // We map all masks with equal element sizeof to a single integer type, the
4677  // one given by __int_for_sizeof_t<_Tp>. This is the approach
4678  // [[gnu::vector_size(N)]] types take as well and it reduces the number of
4679  // template specializations in the implementation classes.
4680  using _Ip = __int_for_sizeof_t<_Tp>;
4681  static constexpr _Ip* _S_type_tag = nullptr;
4682 
4683  friend typename _Traits::_MaskBase;
4684  friend class simd<_Tp, _Abi>; // to construct masks on return
4685  friend typename _Traits::_SimdImpl; // to construct masks on return and
4686  // inspect data on masked operations
4687  public:
4688  using _Impl = typename _Traits::_MaskImpl;
4689  friend _Impl;
4690 
4691  // }}}
4692  // member types {{{
4693  using value_type = bool;
4694  using reference = _SmartReference<_MemberType, _Impl, value_type>;
4695  using simd_type = simd<_Tp, _Abi>;
4696  using abi_type = _Abi;
4697 
4698  // }}}
4699  static constexpr size_t size() // {{{
4700  { return __size_or_zero_v<_Tp, _Abi>; }
4701 
4702  // }}}
4703  // constructors & assignment {{{
4704  simd_mask() = default;
4705  simd_mask(const simd_mask&) = default;
4706  simd_mask(simd_mask&&) = default;
4707  simd_mask& operator=(const simd_mask&) = default;
4708  simd_mask& operator=(simd_mask&&) = default;
4709 
4710  // }}}
4711  // access to internal representation (optional feature) {{{
4712  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR explicit
4713  simd_mask(typename _Traits::_MaskCastType __init)
4714  : _M_data{__init} {}
4715  // conversions to internal type is done in _MaskBase
4716 
4717  // }}}
4718  // bitset interface (extension to be proposed) {{{
4719  // TS_FEEDBACK:
4720  // Conversion of simd_mask to and from bitset makes it much easier to
4721  // interface with other facilities. I suggest adding `static
4722  // simd_mask::from_bitset` and `simd_mask::to_bitset`.
4723  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR static simd_mask
4724  __from_bitset(bitset<size()> bs)
4725  { return {__bitset_init, bs}; }
4726 
4727  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bitset<size()>
4728  __to_bitset() const
4729  { return _Impl::_S_to_bits(_M_data)._M_to_bitset(); }
4730 
4731  // }}}
4732  // explicit broadcast constructor {{{
4733  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
4734  simd_mask(value_type __x)
4735  : _M_data(_Impl::template _S_broadcast<_Ip>(__x)) {}
4736 
4737  // }}}
4738  // implicit type conversion constructor {{{
4739  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4740  // proposed improvement
4741  template <typename _Up, typename _A2,
4742  typename = enable_if_t<simd_size_v<_Up, _A2> == size()>>
4743  _GLIBCXX_SIMD_ALWAYS_INLINE explicit(sizeof(_MemberType)
4744  != sizeof(typename _SimdTraits<_Up, _A2>::_MaskMember))
4745  simd_mask(const simd_mask<_Up, _A2>& __x)
4746  : simd_mask(__proposed::static_simd_cast<simd_mask>(__x)) {}
4747  #else
4748  // conforming to ISO/IEC 19570:2018
4749  template <typename _Up, typename = enable_if_t<conjunction<
4750  is_same<abi_type, simd_abi::fixed_size<size()>>,
4751  is_same<_Up, _Up>>::value>>
4752  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4753  simd_mask(const simd_mask<_Up, simd_abi::fixed_size<size()>>& __x)
4754  : _M_data(_Impl::_S_from_bitmask(__data(__x), _S_type_tag)) {}
4755  #endif
4756 
4757  // }}}
4758  // load constructor {{{
4759  template <typename _Flags>
4760  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4761  simd_mask(const value_type* __mem, _IsSimdFlagType<_Flags>)
4762  : _M_data(_Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem))) {}
4763 
4764  template <typename _Flags>
4765  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
4766  simd_mask(const value_type* __mem, simd_mask __k, _IsSimdFlagType<_Flags>)
4767  : _M_data{}
4768  {
4769  _M_data = _Impl::_S_masked_load(_M_data, __k._M_data,
4770  _Flags::template _S_apply<simd_mask>(__mem));
4771  }
4772 
4773  // }}}
4774  // loads [simd_mask.load] {{{
4775  template <typename _Flags>
4776  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4777  copy_from(const value_type* __mem, _IsSimdFlagType<_Flags>)
4778  { _M_data = _Impl::template _S_load<_Ip>(_Flags::template _S_apply<simd_mask>(__mem)); }
4779 
4780  // }}}
4781  // stores [simd_mask.store] {{{
4782  template <typename _Flags>
4783  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
4784  copy_to(value_type* __mem, _IsSimdFlagType<_Flags>) const
4785  { _Impl::_S_store(_M_data, _Flags::template _S_apply<simd_mask>(__mem)); }
4786 
4787  // }}}
4788  // scalar access {{{
4789  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
4790  operator[](size_t __i)
4791  {
4792  if (__i >= size())
4793  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4794  return {_M_data, int(__i)};
4795  }
4796 
4797  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
4798  operator[](size_t __i) const
4799  {
4800  if (__i >= size())
4801  __invoke_ub("Subscript %d is out of range [0, %d]", __i, size() - 1);
4802  if constexpr (__is_scalar_abi<_Abi>())
4803  return _M_data;
4804  else
4805  return static_cast<bool>(_M_data[__i]);
4806  }
4807 
4808  // }}}
4809  // negation {{{
4810  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd_mask
4811  operator!() const
4812  { return {__private_init, _Impl::_S_bit_not(_M_data)}; }
4813 
4814  // }}}
4815  // simd_mask binary operators [simd_mask.binary] {{{
4816  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4817  // simd_mask<int> && simd_mask<uint> needs disambiguation
4818  template <typename _Up, typename _A2,
4819  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4820  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4821  operator&&(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4822  {
4823  return {__private_init,
4824  _Impl::_S_logical_and(__x._M_data, simd_mask(__y)._M_data)};
4825  }
4826 
4827  template <typename _Up, typename _A2,
4828  typename = enable_if_t<is_convertible_v<simd_mask<_Up, _A2>, simd_mask>>>
4829  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4830  operator||(const simd_mask& __x, const simd_mask<_Up, _A2>& __y)
4831  {
4832  return {__private_init,
4833  _Impl::_S_logical_or(__x._M_data, simd_mask(__y)._M_data)};
4834  }
4835  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4836 
4837  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4838  operator&&(const simd_mask& __x, const simd_mask& __y)
4839  { return {__private_init, _Impl::_S_logical_and(__x._M_data, __y._M_data)}; }
4840 
4841  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4842  operator||(const simd_mask& __x, const simd_mask& __y)
4843  { return {__private_init, _Impl::_S_logical_or(__x._M_data, __y._M_data)}; }
4844 
4845  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4846  operator&(const simd_mask& __x, const simd_mask& __y)
4847  { return {__private_init, _Impl::_S_bit_and(__x._M_data, __y._M_data)}; }
4848 
4849  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4850  operator|(const simd_mask& __x, const simd_mask& __y)
4851  { return {__private_init, _Impl::_S_bit_or(__x._M_data, __y._M_data)}; }
4852 
4853  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4854  operator^(const simd_mask& __x, const simd_mask& __y)
4855  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4856 
4857  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4858  operator&=(simd_mask& __x, const simd_mask& __y)
4859  {
4860  __x._M_data = _Impl::_S_bit_and(__x._M_data, __y._M_data);
4861  return __x;
4862  }
4863 
4864  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4865  operator|=(simd_mask& __x, const simd_mask& __y)
4866  {
4867  __x._M_data = _Impl::_S_bit_or(__x._M_data, __y._M_data);
4868  return __x;
4869  }
4870 
4871  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask&
4872  operator^=(simd_mask& __x, const simd_mask& __y)
4873  {
4874  __x._M_data = _Impl::_S_bit_xor(__x._M_data, __y._M_data);
4875  return __x;
4876  }
4877 
4878  // }}}
4879  // simd_mask compares [simd_mask.comparison] {{{
4880  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4881  operator==(const simd_mask& __x, const simd_mask& __y)
4882  { return !operator!=(__x, __y); }
4883 
4884  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4885  operator!=(const simd_mask& __x, const simd_mask& __y)
4886  { return {__private_init, _Impl::_S_bit_xor(__x._M_data, __y._M_data)}; }
4887 
4888  // }}}
4889  // private_init ctor {{{
4890  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
4891  simd_mask(_PrivateInit, typename _Traits::_MaskMember __init)
4892  : _M_data(__init) {}
4893 
4894  // }}}
4895  // private_init generator ctor {{{
4896  template <typename _Fp, typename = decltype(bool(declval<_Fp>()(size_t())))>
4897  _GLIBCXX_SIMD_INTRINSIC constexpr
4898  simd_mask(_PrivateInit, _Fp&& __gen)
4899  : _M_data()
4900  {
4901  __execute_n_times<size()>([&](auto __i) constexpr _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
4902  _Impl::_S_set(_M_data, __i, __gen(__i));
4903  });
4904  }
4905 
4906  // }}}
4907  // bitset_init ctor {{{
4908  _GLIBCXX_SIMD_INTRINSIC constexpr
4909  simd_mask(_BitsetInit, bitset<size()> __init)
4910  : _M_data(_Impl::_S_from_bitmask(_SanitizedBitMask<size()>(__init), _S_type_tag))
4911  {}
4912 
4913  // }}}
4914  // __cvt {{{
4915  // TS_FEEDBACK:
4916  // The conversion operator this implements should be a ctor on simd_mask.
4917  // Once you call .__cvt() on a simd_mask it converts conveniently.
4918  // A useful variation: add `explicit(sizeof(_Tp) != sizeof(_Up))`
4919  struct _CvtProxy
4920  {
4921  template <typename _Up, typename _A2,
4922  typename = enable_if_t<simd_size_v<_Up, _A2> == simd_size_v<_Tp, _Abi>>>
4923  _GLIBCXX_SIMD_ALWAYS_INLINE
4924  operator simd_mask<_Up, _A2>() &&
4925  {
4926  using namespace std::experimental::__proposed;
4927  return static_simd_cast<simd_mask<_Up, _A2>>(_M_data);
4928  }
4929 
4930  const simd_mask<_Tp, _Abi>& _M_data;
4931  };
4932 
4933  _GLIBCXX_SIMD_INTRINSIC _CvtProxy
4934  __cvt() const
4935  { return {*this}; }
4936 
4937  // }}}
4938  // operator?: overloads (suggested extension) {{{
4939  #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
4940  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4941  operator?:(const simd_mask& __k, const simd_mask& __where_true,
4942  const simd_mask& __where_false)
4943  {
4944  auto __ret = __where_false;
4945  _Impl::_S_masked_assign(__k._M_data, __ret._M_data, __where_true._M_data);
4946  return __ret;
4947  }
4948 
4949  template <typename _U1, typename _U2,
4950  typename _Rp = simd<common_type_t<_U1, _U2>, _Abi>,
4951  typename = enable_if_t<conjunction_v<
4952  is_convertible<_U1, _Rp>, is_convertible<_U2, _Rp>,
4953  is_convertible<simd_mask, typename _Rp::mask_type>>>>
4954  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend _Rp
4955  operator?:(const simd_mask& __k, const _U1& __where_true,
4956  const _U2& __where_false)
4957  {
4958  _Rp __ret = __where_false;
4959  _Rp::_Impl::_S_masked_assign(
4960  __data(static_cast<typename _Rp::mask_type>(__k)), __data(__ret),
4961  __data(static_cast<_Rp>(__where_true)));
4962  return __ret;
4963  }
4964 
4965  #ifdef _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4966  template <typename _Kp, typename _Ak, typename _Up, typename _Au,
4967  typename = enable_if_t<
4968  conjunction_v<is_convertible<simd_mask<_Kp, _Ak>, simd_mask>,
4969  is_convertible<simd_mask<_Up, _Au>, simd_mask>>>>
4970  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd_mask
4971  operator?:(const simd_mask<_Kp, _Ak>& __k, const simd_mask& __where_true,
4972  const simd_mask<_Up, _Au>& __where_false)
4973  {
4974  simd_mask __ret = __where_false;
4975  _Impl::_S_masked_assign(simd_mask(__k)._M_data, __ret._M_data,
4976  __where_true._M_data);
4977  return __ret;
4978  }
4979  #endif // _GLIBCXX_SIMD_ENABLE_IMPLICIT_MASK_CAST
4980  #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
4981 
4982  // }}}
4983  // _M_is_constprop {{{
4984  _GLIBCXX_SIMD_INTRINSIC constexpr bool
4985  _M_is_constprop() const
4986  {
4987  if constexpr (__is_scalar_abi<_Abi>())
4988  return __builtin_constant_p(_M_data);
4989  else
4990  return _M_data._M_is_constprop();
4991  }
4992 
4993  // }}}
4994 
4995  private:
4996  friend const auto& __data<_Tp, abi_type>(const simd_mask&);
4997  friend auto& __data<_Tp, abi_type>(simd_mask&);
4998  alignas(_Traits::_S_mask_align) _MemberType _M_data;
4999  };
5000 
5001 // }}}
5002 
5003 /// @cond undocumented
5004 // __data(simd_mask) {{{
5005 template <typename _Tp, typename _Ap>
5006  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5007  __data(const simd_mask<_Tp, _Ap>& __x)
5008  { return __x._M_data; }
5009 
5010 template <typename _Tp, typename _Ap>
5011  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5012  __data(simd_mask<_Tp, _Ap>& __x)
5013  { return __x._M_data; }
5014 
5015 // }}}
5016 /// @endcond
5017 
5018 // simd_mask reductions [simd_mask.reductions] {{{
5019 template <typename _Tp, typename _Abi>
5020  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5021  all_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5022  {
5023  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5024  {
5025  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5026  if (!__k[__i])
5027  return false;
5028  return true;
5029  }
5030  else
5031  return _Abi::_MaskImpl::_S_all_of(__k);
5032  }
5033 
5034 template <typename _Tp, typename _Abi>
5035  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5036  any_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5037  {
5038  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5039  {
5040  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5041  if (__k[__i])
5042  return true;
5043  return false;
5044  }
5045  else
5046  return _Abi::_MaskImpl::_S_any_of(__k);
5047  }
5048 
5049 template <typename _Tp, typename _Abi>
5050  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5051  none_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5052  {
5053  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5054  {
5055  for (size_t __i = 0; __i < simd_size_v<_Tp, _Abi>; ++__i)
5056  if (__k[__i])
5057  return false;
5058  return true;
5059  }
5060  else
5061  return _Abi::_MaskImpl::_S_none_of(__k);
5062  }
5063 
5064 template <typename _Tp, typename _Abi>
5065  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5066  some_of(const simd_mask<_Tp, _Abi>& __k) noexcept
5067  {
5068  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5069  {
5070  for (size_t __i = 1; __i < simd_size_v<_Tp, _Abi>; ++__i)
5071  if (__k[__i] != __k[__i - 1])
5072  return true;
5073  return false;
5074  }
5075  else
5076  return _Abi::_MaskImpl::_S_some_of(__k);
5077  }
5078 
5079 template <typename _Tp, typename _Abi>
5080  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5081  popcount(const simd_mask<_Tp, _Abi>& __k) noexcept
5082  {
5083  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5084  {
5085  const int __r = __call_with_subscripts<simd_size_v<_Tp, _Abi>>(
5086  __k, [](auto... __elements) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5087  return ((__elements != 0) + ...);
5088  });
5089  if (__builtin_is_constant_evaluated() || __builtin_constant_p(__r))
5090  return __r;
5091  }
5092  return _Abi::_MaskImpl::_S_popcount(__k);
5093  }
5094 
5095 template <typename _Tp, typename _Abi>
5096  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5097  find_first_set(const simd_mask<_Tp, _Abi>& __k)
5098  {
5099  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5100  {
5101  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5102  const size_t _Idx = __call_with_n_evaluations<_Np>(
5103  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5104  return std::min({__indexes...});
5105  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5106  return __k[__i] ? +__i : _Np;
5107  });
5108  if (_Idx >= _Np)
5109  __invoke_ub("find_first_set(empty mask) is UB");
5110  if (__builtin_constant_p(_Idx))
5111  return _Idx;
5112  }
5113  return _Abi::_MaskImpl::_S_find_first_set(__k);
5114  }
5115 
5116 template <typename _Tp, typename _Abi>
5117  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5118  find_last_set(const simd_mask<_Tp, _Abi>& __k)
5119  {
5120  if (__builtin_is_constant_evaluated() || __k._M_is_constprop())
5121  {
5122  constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
5123  const int _Idx = __call_with_n_evaluations<_Np>(
5124  [](auto... __indexes) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5125  return std::max({__indexes...});
5126  }, [&](auto __i) _GLIBCXX_SIMD_ALWAYS_INLINE_LAMBDA {
5127  return __k[__i] ? int(__i) : -1;
5128  });
5129  if (_Idx < 0)
5130  __invoke_ub("find_first_set(empty mask) is UB");
5131  if (__builtin_constant_p(_Idx))
5132  return _Idx;
5133  }
5134  return _Abi::_MaskImpl::_S_find_last_set(__k);
5135  }
5136 
5137 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5138 all_of(_ExactBool __x) noexcept
5139 { return __x; }
5140 
5141 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5142 any_of(_ExactBool __x) noexcept
5143 { return __x; }
5144 
5145 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5146 none_of(_ExactBool __x) noexcept
5147 { return !__x; }
5148 
5149 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR bool
5150 some_of(_ExactBool) noexcept
5151 { return false; }
5152 
5153 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5154 popcount(_ExactBool __x) noexcept
5155 { return __x; }
5156 
5157 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5158 find_first_set(_ExactBool)
5159 { return 0; }
5160 
5161 _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR int
5162 find_last_set(_ExactBool)
5163 { return 0; }
5164 
5165 // }}}
5166 
5167 /// @cond undocumented
5168 // _SimdIntOperators{{{1
5169 template <typename _V, typename _Tp, typename _Abi, bool>
5170  class _SimdIntOperators {};
5171 
5172 template <typename _V, typename _Tp, typename _Abi>
5173  class _SimdIntOperators<_V, _Tp, _Abi, true>
5174  {
5175  using _Impl = typename _SimdTraits<_Tp, _Abi>::_SimdImpl;
5176 
5177  _GLIBCXX_SIMD_INTRINSIC constexpr const _V&
5178  __derived() const
5179  { return *static_cast<const _V*>(this); }
5180 
5181  template <typename _Up>
5182  _GLIBCXX_SIMD_INTRINSIC static _GLIBCXX_SIMD_CONSTEXPR _V
5183  _S_make_derived(_Up&& __d)
5184  { return {__private_init, static_cast<_Up&&>(__d)}; }
5185 
5186  public:
5187  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5188  _V&
5189  operator%=(_V& __lhs, const _V& __x)
5190  { return __lhs = __lhs % __x; }
5191 
5192  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5193  _V&
5194  operator&=(_V& __lhs, const _V& __x)
5195  { return __lhs = __lhs & __x; }
5196 
5197  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5198  _V&
5199  operator|=(_V& __lhs, const _V& __x)
5200  { return __lhs = __lhs | __x; }
5201 
5202  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5203  _V&
5204  operator^=(_V& __lhs, const _V& __x)
5205  { return __lhs = __lhs ^ __x; }
5206 
5207  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5208  _V&
5209  operator<<=(_V& __lhs, const _V& __x)
5210  { return __lhs = __lhs << __x; }
5211 
5212  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5213  _V&
5214  operator>>=(_V& __lhs, const _V& __x)
5215  { return __lhs = __lhs >> __x; }
5216 
5217  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5218  _V&
5219  operator<<=(_V& __lhs, int __x)
5220  { return __lhs = __lhs << __x; }
5221 
5222  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5223  _V&
5224  operator>>=(_V& __lhs, int __x)
5225  { return __lhs = __lhs >> __x; }
5226 
5227  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5228  _V
5229  operator%(const _V& __x, const _V& __y)
5230  {
5231  return _SimdIntOperators::_S_make_derived(
5232  _Impl::_S_modulus(__data(__x), __data(__y)));
5233  }
5234 
5235  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5236  _V
5237  operator&(const _V& __x, const _V& __y)
5238  {
5239  return _SimdIntOperators::_S_make_derived(
5240  _Impl::_S_bit_and(__data(__x), __data(__y)));
5241  }
5242 
5243  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5244  _V
5245  operator|(const _V& __x, const _V& __y)
5246  {
5247  return _SimdIntOperators::_S_make_derived(
5248  _Impl::_S_bit_or(__data(__x), __data(__y)));
5249  }
5250 
5251  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5252  _V
5253  operator^(const _V& __x, const _V& __y)
5254  {
5255  return _SimdIntOperators::_S_make_derived(
5256  _Impl::_S_bit_xor(__data(__x), __data(__y)));
5257  }
5258 
5259  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5260  _V
5261  operator<<(const _V& __x, const _V& __y)
5262  {
5263  return _SimdIntOperators::_S_make_derived(
5264  _Impl::_S_bit_shift_left(__data(__x), __data(__y)));
5265  }
5266 
5267  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5268  _V
5269  operator>>(const _V& __x, const _V& __y)
5270  {
5271  return _SimdIntOperators::_S_make_derived(
5272  _Impl::_S_bit_shift_right(__data(__x), __data(__y)));
5273  }
5274 
5275  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5276  _V
5277  operator<<(const _V& __x, int __y)
5278  {
5279  if (__y < 0)
5280  __invoke_ub("The behavior is undefined if the right operand of a "
5281  "shift operation is negative. [expr.shift]\nA shift by "
5282  "%d was requested",
5283  __y);
5284  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5285  __invoke_ub(
5286  "The behavior is undefined if the right operand of a "
5287  "shift operation is greater than or equal to the width of the "
5288  "promoted left operand. [expr.shift]\nA shift by %d was requested",
5289  __y);
5290  return _SimdIntOperators::_S_make_derived(
5291  _Impl::_S_bit_shift_left(__data(__x), __y));
5292  }
5293 
5294  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend
5295  _V
5296  operator>>(const _V& __x, int __y)
5297  {
5298  if (__y < 0)
5299  __invoke_ub(
5300  "The behavior is undefined if the right operand of a shift "
5301  "operation is negative. [expr.shift]\nA shift by %d was requested",
5302  __y);
5303  if (size_t(__y) >= sizeof(declval<_Tp>() << __y) * __CHAR_BIT__)
5304  __invoke_ub(
5305  "The behavior is undefined if the right operand of a shift "
5306  "operation is greater than or equal to the width of the promoted "
5307  "left operand. [expr.shift]\nA shift by %d was requested",
5308  __y);
5309  return _SimdIntOperators::_S_make_derived(
5310  _Impl::_S_bit_shift_right(__data(__x), __y));
5311  }
5312 
5313  // unary operators (for integral _Tp)
5314  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5315  _V
5316  operator~() const
5317  { return {__private_init, _Impl::_S_complement(__derived()._M_data)}; }
5318  };
5319 
5320 //}}}1
5321 /// @endcond
5322 
5323 // simd {{{
5324 template <typename _Tp, typename _Abi>
5325  class simd : public _SimdIntOperators<
5326  simd<_Tp, _Abi>, _Tp, _Abi,
5327  conjunction<is_integral<_Tp>,
5328  typename _SimdTraits<_Tp, _Abi>::_IsValid>::value>,
5329  public _SimdTraits<_Tp, _Abi>::_SimdBase
5330  {
5331  using _Traits = _SimdTraits<_Tp, _Abi>;
5332  using _MemberType = typename _Traits::_SimdMember;
5333  using _CastType = typename _Traits::_SimdCastType;
5334  static constexpr _Tp* _S_type_tag = nullptr;
5335  friend typename _Traits::_SimdBase;
5336 
5337  public:
5338  using _Impl = typename _Traits::_SimdImpl;
5339  friend _Impl;
5340  friend _SimdIntOperators<simd, _Tp, _Abi, true>;
5341 
5342  using value_type = _Tp;
5343  using reference = _SmartReference<_MemberType, _Impl, value_type>;
5344  using mask_type = simd_mask<_Tp, _Abi>;
5345  using abi_type = _Abi;
5346 
5347  static constexpr size_t size()
5348  { return __size_or_zero_v<_Tp, _Abi>; }
5349 
5350  _GLIBCXX_SIMD_CONSTEXPR simd() = default;
5351  _GLIBCXX_SIMD_CONSTEXPR simd(const simd&) = default;
5352  _GLIBCXX_SIMD_CONSTEXPR simd(simd&&) noexcept = default;
5353  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(const simd&) = default;
5354  _GLIBCXX_SIMD_CONSTEXPR simd& operator=(simd&&) noexcept = default;
5355 
5356  // implicit broadcast constructor
5357  template <typename _Up,
5358  typename = enable_if_t<!is_same_v<__remove_cvref_t<_Up>, bool>>>
5359  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5360  simd(_ValuePreservingOrInt<_Up, value_type>&& __x)
5361  : _M_data(
5362  _Impl::_S_broadcast(static_cast<value_type>(static_cast<_Up&&>(__x))))
5363  {}
5364 
5365  // implicit type conversion constructor (convert from fixed_size to
5366  // fixed_size)
5367  template <typename _Up>
5368  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5369  simd(const simd<_Up, simd_abi::fixed_size<size()>>& __x,
5370  enable_if_t<
5371  conjunction<
5372  is_same<simd_abi::fixed_size<size()>, abi_type>,
5373  negation<__is_narrowing_conversion<_Up, value_type>>,
5374  __converts_to_higher_integer_rank<_Up, value_type>>::value,
5375  void*> = nullptr)
5376  : simd{static_cast<array<_Up, size()>>(__x).data(), vector_aligned} {}
5377 
5378  // explicit type conversion constructor
5379 #ifdef _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5380  template <typename _Up, typename _A2,
5381  typename = decltype(static_simd_cast<simd>(
5382  declval<const simd<_Up, _A2>&>()))>
5383  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5384  simd(const simd<_Up, _A2>& __x)
5385  : simd(static_simd_cast<simd>(__x)) {}
5386 #endif // _GLIBCXX_SIMD_ENABLE_STATIC_CAST
5387 
5388  // generator constructor
5389  template <typename _Fp>
5390  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5391  simd(_Fp&& __gen, _ValuePreservingOrInt<decltype(declval<_Fp>()(
5392  declval<_SizeConstant<0>&>())),
5393  value_type>* = nullptr)
5394  : _M_data(_Impl::_S_generator(static_cast<_Fp&&>(__gen), _S_type_tag)) {}
5395 
5396  // load constructor
5397  template <typename _Up, typename _Flags>
5398  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR
5399  simd(const _Up* __mem, _IsSimdFlagType<_Flags>)
5400  : _M_data(
5401  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag))
5402  {}
5403 
5404  // loads [simd.load]
5405  template <typename _Up, typename _Flags>
5406  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5407  copy_from(const _Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>)
5408  {
5409  _M_data = static_cast<decltype(_M_data)>(
5410  _Impl::_S_load(_Flags::template _S_apply<simd>(__mem), _S_type_tag));
5411  }
5412 
5413  // stores [simd.store]
5414  template <typename _Up, typename _Flags>
5415  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR void
5416  copy_to(_Vectorizable<_Up>* __mem, _IsSimdFlagType<_Flags>) const
5417  {
5418  _Impl::_S_store(_M_data, _Flags::template _S_apply<simd>(__mem),
5419  _S_type_tag);
5420  }
5421 
5422  // scalar access
5423  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR reference
5424  operator[](size_t __i)
5425  { return {_M_data, int(__i)}; }
5426 
5427  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR value_type
5428  operator[]([[maybe_unused]] size_t __i) const
5429  {
5430  if constexpr (__is_scalar_abi<_Abi>())
5431  {
5432  _GLIBCXX_DEBUG_ASSERT(__i == 0);
5433  return _M_data;
5434  }
5435  else
5436  return _M_data[__i];
5437  }
5438 
5439  // increment and decrement:
5440  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5441  operator++()
5442  {
5443  _Impl::_S_increment(_M_data);
5444  return *this;
5445  }
5446 
5447  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5448  operator++(int)
5449  {
5450  simd __r = *this;
5451  _Impl::_S_increment(_M_data);
5452  return __r;
5453  }
5454 
5455  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd&
5456  operator--()
5457  {
5458  _Impl::_S_decrement(_M_data);
5459  return *this;
5460  }
5461 
5462  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5463  operator--(int)
5464  {
5465  simd __r = *this;
5466  _Impl::_S_decrement(_M_data);
5467  return __r;
5468  }
5469 
5470  // unary operators (for any _Tp)
5471  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR mask_type
5472  operator!() const
5473  { return {__private_init, _Impl::_S_negate(_M_data)}; }
5474 
5475  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5476  operator+() const
5477  { return *this; }
5478 
5479  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR simd
5480  operator-() const
5481  { return {__private_init, _Impl::_S_unary_minus(_M_data)}; }
5482 
5483  // access to internal representation (suggested extension)
5484  _GLIBCXX_SIMD_ALWAYS_INLINE explicit _GLIBCXX_SIMD_CONSTEXPR
5485  simd(_CastType __init) : _M_data(__init) {}
5486 
5487  // compound assignment [simd.cassign]
5488  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5489  operator+=(simd& __lhs, const simd& __x)
5490  { return __lhs = __lhs + __x; }
5491 
5492  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5493  operator-=(simd& __lhs, const simd& __x)
5494  { return __lhs = __lhs - __x; }
5495 
5496  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5497  operator*=(simd& __lhs, const simd& __x)
5498  { return __lhs = __lhs * __x; }
5499 
5500  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd&
5501  operator/=(simd& __lhs, const simd& __x)
5502  { return __lhs = __lhs / __x; }
5503 
5504  // binary operators [simd.binary]
5505  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5506  operator+(const simd& __x, const simd& __y)
5507  { return {__private_init, _Impl::_S_plus(__x._M_data, __y._M_data)}; }
5508 
5509  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5510  operator-(const simd& __x, const simd& __y)
5511  { return {__private_init, _Impl::_S_minus(__x._M_data, __y._M_data)}; }
5512 
5513  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5514  operator*(const simd& __x, const simd& __y)
5515  { return {__private_init, _Impl::_S_multiplies(__x._M_data, __y._M_data)}; }
5516 
5517  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5518  operator/(const simd& __x, const simd& __y)
5519  { return {__private_init, _Impl::_S_divides(__x._M_data, __y._M_data)}; }
5520 
5521  // compares [simd.comparison]
5522  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5523  operator==(const simd& __x, const simd& __y)
5524  { return simd::_S_make_mask(_Impl::_S_equal_to(__x._M_data, __y._M_data)); }
5525 
5526  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5527  operator!=(const simd& __x, const simd& __y)
5528  {
5529  return simd::_S_make_mask(
5530  _Impl::_S_not_equal_to(__x._M_data, __y._M_data));
5531  }
5532 
5533  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5534  operator<(const simd& __x, const simd& __y)
5535  { return simd::_S_make_mask(_Impl::_S_less(__x._M_data, __y._M_data)); }
5536 
5537  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5538  operator<=(const simd& __x, const simd& __y)
5539  {
5540  return simd::_S_make_mask(_Impl::_S_less_equal(__x._M_data, __y._M_data));
5541  }
5542 
5543  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5544  operator>(const simd& __x, const simd& __y)
5545  { return simd::_S_make_mask(_Impl::_S_less(__y._M_data, __x._M_data)); }
5546 
5547  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend mask_type
5548  operator>=(const simd& __x, const simd& __y)
5549  {
5550  return simd::_S_make_mask(_Impl::_S_less_equal(__y._M_data, __x._M_data));
5551  }
5552 
5553  // operator?: overloads (suggested extension) {{{
5554 #ifdef __GXX_CONDITIONAL_IS_OVERLOADABLE__
5555  _GLIBCXX_SIMD_ALWAYS_INLINE _GLIBCXX_SIMD_CONSTEXPR friend simd
5556  operator?:(const mask_type& __k, const simd& __where_true,
5557  const simd& __where_false)
5558  {
5559  auto __ret = __where_false;
5560  _Impl::_S_masked_assign(__data(__k), __data(__ret), __data(__where_true));
5561  return __ret;
5562  }
5563 
5564 #endif // __GXX_CONDITIONAL_IS_OVERLOADABLE__
5565  // }}}
5566 
5567  // "private" because of the first arguments's namespace
5568  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5569  simd(_PrivateInit, const _MemberType& __init)
5570  : _M_data(__init) {}
5571 
5572  // "private" because of the first arguments's namespace
5573  _GLIBCXX_SIMD_INTRINSIC
5574  simd(_BitsetInit, bitset<size()> __init) : _M_data()
5575  { where(mask_type(__bitset_init, __init), *this) = ~*this; }
5576 
5577  _GLIBCXX_SIMD_INTRINSIC constexpr bool
5578  _M_is_constprop() const
5579  {
5580  if constexpr (__is_scalar_abi<_Abi>())
5581  return __builtin_constant_p(_M_data);
5582  else
5583  return _M_data._M_is_constprop();
5584  }
5585 
5586  private:
5587  _GLIBCXX_SIMD_INTRINSIC static constexpr mask_type
5588  _S_make_mask(typename mask_type::_MemberType __k)
5589  { return {__private_init, __k}; }
5590 
5591  friend const auto& __data<value_type, abi_type>(const simd&);
5592  friend auto& __data<value_type, abi_type>(simd&);
5593  alignas(_Traits::_S_simd_align) _MemberType _M_data;
5594  };
5595 
5596 // }}}
5597 /// @cond undocumented
5598 // __data {{{
5599 template <typename _Tp, typename _Ap>
5600  _GLIBCXX_SIMD_INTRINSIC constexpr const auto&
5601  __data(const simd<_Tp, _Ap>& __x)
5602  { return __x._M_data; }
5603 
5604 template <typename _Tp, typename _Ap>
5605  _GLIBCXX_SIMD_INTRINSIC constexpr auto&
5606  __data(simd<_Tp, _Ap>& __x)
5607  { return __x._M_data; }
5608 
5609 // }}}
5610 namespace __float_bitwise_operators { //{{{
5611 template <typename _Tp, typename _Ap>
5612  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5613  operator^(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5614  { return {__private_init, _Ap::_SimdImpl::_S_bit_xor(__data(__a), __data(__b))}; }
5615 
5616 template <typename _Tp, typename _Ap>
5617  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5618  operator|(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5619  { return {__private_init, _Ap::_SimdImpl::_S_bit_or(__data(__a), __data(__b))}; }
5620 
5621 template <typename _Tp, typename _Ap>
5622  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR simd<_Tp, _Ap>
5623  operator&(const simd<_Tp, _Ap>& __a, const simd<_Tp, _Ap>& __b)
5624  { return {__private_init, _Ap::_SimdImpl::_S_bit_and(__data(__a), __data(__b))}; }
5625 
5626 template <typename _Tp, typename _Ap>
5627  _GLIBCXX_SIMD_INTRINSIC _GLIBCXX_SIMD_CONSTEXPR
5628  enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Ap>>
5629  operator~(const simd<_Tp, _Ap>& __a)
5630  { return {__private_init, _Ap::_SimdImpl::_S_complement(__data(__a))}; }
5631 } // namespace __float_bitwise_operators }}}
5632 /// @endcond
5633 
5634 /// @}
5635 _GLIBCXX_SIMD_END_NAMESPACE
5636 
5637 #endif // __cplusplus >= 201703L
5638 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_H
5639 
5640 // vim: foldmethod=marker foldmarker={{{,}}}
constexpr _If_is_unsigned_integer< _Tp, int > popcount(_Tp __x) noexcept
The number of bits set in x.
Definition: bit:426
constexpr duration< __common_rep_t< _Rep1, __disable_if_is_duration< _Rep2 > >, _Period > operator%(const duration< _Rep1, _Period > &__d, const _Rep2 &__s)
Definition: chrono.h:779
constexpr complex< _Tp > operator*(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x times y.
Definition: complex:395
constexpr complex< _Tp > operator/(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x divided by y.
Definition: complex:425
constexpr complex< _Tp > operator-(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x minus y.
Definition: complex:365
constexpr complex< _Tp > operator+(const complex< _Tp > &__x, const complex< _Tp > &__y)
Return new complex value x plus y.
Definition: complex:335
typename remove_reference< _Tp >::type remove_reference_t
Alias template for remove_reference.
Definition: type_traits:1640
typename make_unsigned< _Tp >::type make_unsigned_t
Alias template for make_unsigned.
Definition: type_traits:1983
void void_t
A metafunction that always yields void, used for detecting valid types.
Definition: type_traits:2636
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:82
typename conditional< _Cond, _Iftrue, _Iffalse >::type conditional_t
Alias template for conditional.
Definition: type_traits:2618
typename remove_pointer< _Tp >::type remove_pointer_t
Alias template for remove_pointer.
Definition: type_traits:2065
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:85
typename remove_const< _Tp >::type remove_const_t
Alias template for remove_const.
Definition: type_traits:1583
typename enable_if< _Cond, _Tp >::type enable_if_t
Alias template for enable_if.
Definition: type_traits:2614
constexpr auto tuple_cat(_Tpls &&... __tpls) -> typename __tuple_cat_result< _Tpls... >::__type
Create a tuple containing all elements from multiple tuple-like objects.
Definition: tuple:2155
auto declval() noexcept -> decltype(__declval< _Tp >(0))
Definition: type_traits:2386
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:97
constexpr tuple< typename __decay_and_strip< _Elements >::__type... > make_tuple(_Elements &&... __args)
Create a tuple containing copies of the arguments.
Definition: tuple:2001
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:257
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:233
constexpr _Tp reduce(_InputIterator __first, _InputIterator __last, _Tp __init, _BinaryOperation __binary_op)
Calculate reduction of values in a range.
Definition: numeric:291
std::basic_istream< _CharT, _Traits > & operator>>(std::basic_istream< _CharT, _Traits > &__is, bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1593
constexpr bitset< _Nb > operator&(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1553
std::basic_ostream< _CharT, _Traits > & operator<<(std::basic_ostream< _CharT, _Traits > &__os, const bitset< _Nb > &__x)
Global I/O operators for bitsets.
Definition: bitset:1683
make_integer_sequence< size_t, _Num > make_index_sequence
Alias template make_index_sequence.
Definition: utility.h:188
constexpr bitset< _Nb > operator|(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1563
constexpr bitset< _Nb > operator^(const bitset< _Nb > &__x, const bitset< _Nb > &__y) noexcept
Global bitwise operations on bitsets.
Definition: bitset:1573