X7ROOT File Manager
Current Path:
/lib64/llvm17/lib/clang/17/include
lib64
/
llvm17
/
lib
/
clang
/
17
/
include
/
ðŸ“
..
📄
__clang_cuda_builtin_vars.h
(4.78 KB)
📄
__clang_cuda_cmath.h
(18.06 KB)
📄
__clang_cuda_complex_builtins.h
(9.36 KB)
📄
__clang_cuda_device_functions.h
(56.68 KB)
📄
__clang_cuda_intrinsics.h
(29.93 KB)
📄
__clang_cuda_libdevice_declares.h
(21.87 KB)
📄
__clang_cuda_math.h
(15.99 KB)
📄
__clang_cuda_math_forward_declares.h
(8.27 KB)
📄
__clang_cuda_runtime_wrapper.h
(17.61 KB)
📄
__clang_cuda_texture_intrinsics.h
(31.86 KB)
📄
__clang_hip_cmath.h
(26.34 KB)
📄
__clang_hip_libdevice_declares.h
(19.87 KB)
📄
__clang_hip_math.h
(31.96 KB)
📄
__clang_hip_runtime_wrapper.h
(4.65 KB)
📄
__clang_hip_stdlib.h
(1.19 KB)
📄
__stddef_max_align_t.h
(857 B)
📄
__wmmintrin_aes.h
(5.15 KB)
📄
__wmmintrin_pclmul.h
(1.99 KB)
📄
adxintrin.h
(7.37 KB)
📄
altivec.h
(697.32 KB)
📄
ammintrin.h
(7.54 KB)
📄
amxcomplexintrin.h
(6.81 KB)
📄
amxfp16intrin.h
(1.82 KB)
📄
amxintrin.h
(21.12 KB)
📄
arm64intr.h
(993 B)
📄
arm_acle.h
(25.66 KB)
📄
arm_bf16.h
(548 B)
📄
arm_cde.h
(32.67 KB)
📄
arm_cmse.h
(6.21 KB)
📄
arm_fp16.h
(16.92 KB)
📄
arm_mve.h
(1.48 MB)
📄
arm_neon.h
(2.45 MB)
📄
arm_neon_sve_bridge.h
(9.48 KB)
📄
arm_sme_draft_spec_subject_to_change.h
(60.2 KB)
📄
arm_sve.h
(1.51 MB)
📄
armintr.h
(843 B)
📄
avx2intrin.h
(186.96 KB)
📄
avx512bf16intrin.h
(10.51 KB)
📄
avx512bitalgintrin.h
(2.41 KB)
📄
avx512bwintrin.h
(75.33 KB)
📄
avx512cdintrin.h
(4.12 KB)
📄
avx512dqintrin.h
(58.75 KB)
📄
avx512erintrin.h
(11.83 KB)
📄
avx512fintrin.h
(382.64 KB)
📄
avx512fp16intrin.h
(156.63 KB)
📄
avx512ifmaintrin.h
(2.49 KB)
📄
avx512ifmavlintrin.h
(4.31 KB)
📄
avx512pfintrin.h
(4.53 KB)
📄
avx512vbmi2intrin.h
(13.17 KB)
📄
avx512vbmiintrin.h
(3.72 KB)
📄
avx512vbmivlintrin.h
(6.94 KB)
📄
avx512vlbf16intrin.h
(19.21 KB)
📄
avx512vlbitalgintrin.h
(4.23 KB)
📄
avx512vlbwintrin.h
(121.26 KB)
📄
avx512vlcdintrin.h
(7.66 KB)
📄
avx512vldqintrin.h
(46.41 KB)
📄
avx512vlfp16intrin.h
(85.51 KB)
📄
avx512vlintrin.h
(322.29 KB)
📄
avx512vlvbmi2intrin.h
(25.72 KB)
📄
avx512vlvnniintrin.h
(13.13 KB)
📄
avx512vlvp2intersectintrin.h
(4.44 KB)
📄
avx512vnniintrin.h
(4.21 KB)
📄
avx512vp2intersectintrin.h
(2.9 KB)
📄
avx512vpopcntdqintrin.h
(2 KB)
📄
avx512vpopcntdqvlintrin.h
(3.31 KB)
📄
avxifmaintrin.h
(5.75 KB)
📄
avxintrin.h
(195.41 KB)
📄
avxneconvertintrin.h
(14.09 KB)
📄
avxvnniint16intrin.h
(17.41 KB)
📄
avxvnniint8intrin.h
(18.67 KB)
📄
avxvnniintrin.h
(10.44 KB)
📄
bmi2intrin.h
(7.09 KB)
📄
bmiintrin.h
(14.12 KB)
📄
builtins.h
(741 B)
📄
cet.h
(1.49 KB)
📄
cetintrin.h
(3.27 KB)
📄
cldemoteintrin.h
(1.18 KB)
📄
clflushoptintrin.h
(1.17 KB)
📄
clwbintrin.h
(1.2 KB)
📄
clzerointrin.h
(1.19 KB)
📄
cmpccxaddintrin.h
(2.33 KB)
📄
cpuid.h
(11.01 KB)
📄
crc32intrin.h
(3.27 KB)
ðŸ“
cuda_wrappers
📄
emmintrin.h
(192.64 KB)
📄
enqcmdintrin.h
(2.12 KB)
📄
f16cintrin.h
(5.39 KB)
📄
float.h
(5.63 KB)
📄
fma4intrin.h
(6.82 KB)
📄
fmaintrin.h
(28.4 KB)
📄
fxsrintrin.h
(2.82 KB)
📄
gfniintrin.h
(7.57 KB)
📄
hexagon_circ_brev_intrinsics.h
(15.59 KB)
📄
hexagon_protos.h
(374.42 KB)
📄
hexagon_types.h
(130.33 KB)
📄
hresetintrin.h
(1.36 KB)
📄
htmintrin.h
(6.14 KB)
📄
htmxlintrin.h
(9.01 KB)
📄
hvx_hexagon_protos.h
(254.26 KB)
📄
ia32intrin.h
(12.72 KB)
📄
immintrin.h
(23.57 KB)
📄
intrin.h
(28.22 KB)
📄
inttypes.h
(2.26 KB)
📄
invpcidintrin.h
(764 B)
📄
iso646.h
(656 B)
📄
keylockerintrin.h
(17.98 KB)
📄
larchintrin.h
(7.8 KB)
📄
limits.h
(3.61 KB)
ðŸ“
llvm_libc_wrappers
📄
lwpintrin.h
(5 KB)
📄
lzcntintrin.h
(3.18 KB)
📄
mm3dnow.h
(4.5 KB)
📄
mm_malloc.h
(1.88 KB)
📄
mmintrin.h
(55.98 KB)
📄
module.modulemap
(3.33 KB)
📄
movdirintrin.h
(1.57 KB)
📄
msa.h
(25.01 KB)
📄
mwaitxintrin.h
(2.19 KB)
📄
nmmintrin.h
(709 B)
📄
opencl-c-base.h
(30.38 KB)
📄
opencl-c.h
(874.39 KB)
ðŸ“
openmp_wrappers
📄
pconfigintrin.h
(1.19 KB)
📄
pkuintrin.h
(934 B)
📄
pmmintrin.h
(10.5 KB)
📄
popcntintrin.h
(1.82 KB)
ðŸ“
ppc_wrappers
📄
prfchiintrin.h
(2.02 KB)
📄
prfchwintrin.h
(2.06 KB)
📄
ptwriteintrin.h
(1.05 KB)
📄
raointintrin.h
(6.59 KB)
📄
rdpruintrin.h
(1.59 KB)
📄
rdseedintrin.h
(2.85 KB)
📄
riscv_ntlh.h
(855 B)
📄
rtmintrin.h
(1.25 KB)
📄
s390intrin.h
(604 B)
📄
serializeintrin.h
(881 B)
📄
sgxintrin.h
(1.77 KB)
📄
sha512intrin.h
(5.95 KB)
📄
shaintrin.h
(7.37 KB)
📄
sifive_vector.h
(522 B)
📄
sm3intrin.h
(7.29 KB)
📄
sm4intrin.h
(8.2 KB)
📄
smmintrin.h
(99.32 KB)
📄
stdalign.h
(911 B)
📄
stdarg.h
(1.66 KB)
📄
stdatomic.h
(8.3 KB)
📄
stdbool.h
(1.04 KB)
📄
stddef.h
(4.16 KB)
📄
stdint.h
(32.49 KB)
📄
stdnoreturn.h
(1.17 KB)
📄
tbmintrin.h
(3.15 KB)
📄
tgmath.h
(29.68 KB)
📄
tmmintrin.h
(29.51 KB)
📄
tsxldtrkintrin.h
(1.97 KB)
📄
uintrintrin.h
(4.96 KB)
📄
unwind.h
(11.21 KB)
📄
vadefs.h
(1.39 KB)
📄
vaesintrin.h
(2.46 KB)
📄
varargs.h
(477 B)
📄
vecintrin.h
(360.82 KB)
📄
velintrin.h
(2.1 KB)
📄
velintrin_approx.h
(3.54 KB)
📄
velintrin_gen.h
(69.06 KB)
📄
vpclmulqdqintrin.h
(1.06 KB)
📄
waitpkgintrin.h
(1.33 KB)
📄
wasm_simd128.h
(76.25 KB)
📄
wbnoinvdintrin.h
(749 B)
📄
wmmintrin.h
(659 B)
📄
x86gprintrin.h
(2.32 KB)
📄
x86intrin.h
(1.81 KB)
📄
xmmintrin.h
(106.73 KB)
📄
xopintrin.h
(19.96 KB)
📄
xsavecintrin.h
(2.51 KB)
📄
xsaveintrin.h
(1.64 KB)
📄
xsaveoptintrin.h
(1 KB)
📄
xsavesintrin.h
(1.24 KB)
📄
xtestintrin.h
(873 B)
Editing: avx512vldqintrin.h
/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ #ifndef __IMMINTRIN_H #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." #endif #ifndef __AVX512VLDQINTRIN_H #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)_mm_setzero_pd()); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)__W); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepi64_ps(__A), (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtepu64_ps(__A), (__v4sf)_mm_setzero_ps()); } #define _mm_range_pd(A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_range_pd(W, U, A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_range_pd(U, A, B, C) \ ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ (__v2df)(__m128d)(B), (int)(C), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_range_pd(A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_range_pd(W, U, A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_range_pd(U, A, B, C) \ ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ (__v4df)(__m256d)(B), (int)(C), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_range_ps(A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_range_ps(W, U, A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)(__m128)(W), (__mmask8)(U))) #define _mm_maskz_range_ps(U, A, B, C) \ ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ (__v4sf)(__m128)(B), (int)(C), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_range_ps(A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_range_ps(W, U, A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)(__m256)(W), (__mmask8)(U))) #define _mm256_maskz_range_ps(U, A, B, C) \ ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ (__v8sf)(__m256)(B), (int)(C), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) #define _mm_reduce_pd(A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1)) #define _mm_mask_reduce_pd(W, U, A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm_maskz_reduce_pd(U, A, B) \ ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_reduce_pd(A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)-1)) #define _mm256_mask_reduce_pd(W, U, A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)(__m256d)(W), \ (__mmask8)(U))) #define _mm256_maskz_reduce_pd(U, A, B) \ ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ (__v4df)_mm256_setzero_pd(), \ (__mmask8)(U))) #define _mm_reduce_ps(A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1)) #define _mm_mask_reduce_ps(W, U, A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)(__m128)(W), \ (__mmask8)(U))) #define _mm_maskz_reduce_ps(U, A, B) \ ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U))) #define _mm256_reduce_ps(A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)-1)) #define _mm256_mask_reduce_ps(W, U, A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)(__m256)(W), \ (__mmask8)(U))) #define _mm256_maskz_reduce_ps(U, A, B) \ ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U))) static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2 (__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)__O); } static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, (__v8sf)_mm256_broadcast_f32x2(__A), (__v8sf)_mm256_setzero_ps()); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)__O); } static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, (__v4df)_mm256_broadcast_f64x2(__A), (__v4df)_mm256_setzero_pd()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2 (__m128i __A) { return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)__O); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_broadcast_i32x2(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2 (__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_broadcast_i32x2(__A), (__v8si)_mm256_setzero_si256()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)__O); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_broadcast_i64x2(__A), (__v4di)_mm256_setzero_si256()); } #define _mm256_extractf64x2_pd(A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)_mm_undefined_pd(), \ (__mmask8)-1)) #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)(__m128d)(W), \ (__mmask8)(U))) #define _mm256_maskz_extractf64x2_pd(U, A, imm) \ ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ (int)(imm), \ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U))) #define _mm256_extracti64x2_epi64(A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)_mm_undefined_si128(), \ (__mmask8)-1)) #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)(__m128i)(W), \ (__mmask8)(U))) #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ (int)(imm), \ (__v2di)_mm_setzero_si128(), \ (__mmask8)(U))) #define _mm256_insertf64x2(A, B, imm) \ ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ (__v2df)(__m128d)(B), (int)(imm))) #define _mm256_mask_insertf64x2(W, U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ (__v4df)(__m256d)(W))) #define _mm256_maskz_insertf64x2(U, A, B, imm) \ ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ (__v4df)_mm256_setzero_pd())) #define _mm256_inserti64x2(A, B, imm) \ ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ (__v2di)(__m128i)(B), (int)(imm))) #define _mm256_mask_inserti64x2(W, U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ (__v4di)(__m256i)(W))) #define _mm256_maskz_inserti64x2(U, A, B, imm) \ ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256())) #define _mm_mask_fpclass_pd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm_fpclass_pd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fpclass_pd_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ (__mmask8)(U))) #define _mm256_fpclass_pd_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ (__mmask8)-1)) #define _mm_mask_fpclass_ps_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)(U))) #define _mm_fpclass_ps_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ (__mmask8)-1)) #define _mm256_mask_fpclass_ps_mask(U, A, imm) \ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)(U))) #define _mm256_fpclass_ps_mask(A, imm) \ ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)-1)) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 #endif
Upload File
Create Folder