Buckets:
arudradey/ml-cpu-storage / emsdk /upstream /emscripten /cache /sysroot /include /compat /xmmintrin.h
| /* | |
| * Copyright 2020 The Emscripten Authors. All rights reserved. | |
| * Emscripten is available under two separate licenses, the MIT license and the | |
| * University of Illinois/NCSA Open Source License. Both these licenses can be | |
| * found in the LICENSE file. | |
| */ | |
| // Emscripten SIMD support doesn't support MMX/float32x2/__m64. | |
| // However, we support loading and storing 2-vectors, so | |
| // recognize the type at least. | |
| typedef float __m64 __attribute__((__vector_size__(8), __aligned__(8))); | |
| typedef __f32x4 __m128; | |
| typedef v128_t __m128i; | |
| // This is defined as a macro because __builtin_shufflevector requires its | |
| // mask argument to be a compile-time constant. | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_set_ps(float __z, float __y, float __x, float __w) | |
| { | |
| return (__m128)wasm_f32x4_make(__w, __x, __y, __z); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_setr_ps(float __z, float __y, float __x, float __w) | |
| { | |
| return (__m128)wasm_f32x4_make(__z, __y, __x, __w); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_set_ss(float __w) | |
| { | |
| return (__m128)wasm_f32x4_make(__w, 0, 0, 0); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_set_ps1(float __w) | |
| { | |
| return (__m128)wasm_f32x4_splat(__w); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_setzero_ps(void) | |
| { | |
| return (__m128)wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_load_ps(const float *__p) | |
| { | |
| return *(__m128*)__p; | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_loadl_pi(__m128 __a, const void /*__m64*/ *__p) | |
| { | |
| return (__m128)wasm_v128_load64_lane(__p, (v128_t)__a, 0); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_loadh_pi(__m128 __a, const void /*__m64*/ *__p) | |
| { | |
| return (__m128)wasm_v128_load64_lane(__p, (v128_t)__a, 1); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_loadr_ps(const float *__p) | |
| { | |
| __m128 __v = _mm_load_ps(__p); | |
| return (__m128)__f32x4_shuffle(__v, __v, 3, 2, 1, 0); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_loadu_ps(const float *__p) | |
| { | |
| return (__m128)wasm_v128_load(__p); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_load_ps1(const float *__p) | |
| { | |
| return (__m128)wasm_v32x4_load_splat(__p); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_load_ss(const float *__p) | |
| { | |
| return (__m128)wasm_v128_load32_zero(__p); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_storel_pi(__m64 *__p, __m128 __a) | |
| { | |
| wasm_v128_store64_lane((void*)__p, (v128_t)__a, 0); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_storeh_pi(__m64 *__p, __m128 __a) | |
| { | |
| wasm_v128_store64_lane((void*)__p, (v128_t)__a, 1); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_store_ps(float *__p, __m128 __a) | |
| { | |
| *(__m128 *)__p = __a; | |
| } | |
| // No NTA cache hint available. | |
| // No prefetch available, dummy it out. | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_prefetch(const void *__p, int __i) | |
| { | |
| ((void)__p); | |
| ((void)__i); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_sfence(void) | |
| { | |
| // Wasm/SharedArrayBuffer memory model is sequentially consistent. | |
| // Perhaps a future version of the spec can provide a related fence. | |
| __sync_synchronize(); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_storer_ps(float *__p, __m128 __a) | |
| { | |
| _mm_store_ps(__p, _mm_shuffle_ps(__a, __a, _MM_SHUFFLE(0, 1, 2, 3))); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_store_ps1(float *__p, __m128 __a) | |
| { | |
| _mm_store_ps(__p, _mm_shuffle_ps(__a, __a, _MM_SHUFFLE(0, 0, 0, 0))); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_store_ss(float *__p, __m128 __a) | |
| { | |
| wasm_v128_store32_lane((void*)__p, (v128_t)__a, 0); | |
| } | |
| static __inline__ void __attribute__((__always_inline__, __nodebug__)) | |
| _mm_storeu_ps(float *__p, __m128 __a) | |
| { | |
| struct __unaligned { | |
| __m128 __v; | |
| } __attribute__((__packed__, __may_alias__)); | |
| ((struct __unaligned *)__p)->__v = __a; | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) | |
| _mm_movemask_ps(__m128 __a) | |
| { | |
| return (int)wasm_i32x4_bitmask((v128_t)__a); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_move_ss(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)__f32x4_shuffle(__a, __b, 4, 1, 2, 3); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_add_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_add((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_add_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_add_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_sub_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_sub((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_sub_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_sub_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_mul_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_mul((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_mul_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_mul_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_div_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_div((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_div_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_div_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_min_ps(__m128 __a, __m128 __b) | |
| { | |
| // return (__m128)wasm_f32x4_pmin((v128_t)__a, (v128_t)__b); // TODO: Migrate to this, once it works in VMs | |
| return (__m128)wasm_v128_bitselect((v128_t)__a, (v128_t)__b, (v128_t)wasm_f32x4_lt((v128_t)__a, (v128_t)__b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_min_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_min_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_max_ps(__m128 __a, __m128 __b) | |
| { | |
| // return (__m128)wasm_f32x4_pmax((v128_t)__a, (v128_t)__b); // TODO: Migrate to this, once it works in VMs | |
| return (__m128)wasm_v128_bitselect((v128_t)__a, (v128_t)__b, (v128_t)wasm_f32x4_gt((v128_t)__a, (v128_t)__b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_max_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_max_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_rcp_ps(__m128 __a) | |
| { | |
| return (__m128)wasm_f32x4_div((v128_t)_mm_set1_ps(1.0f), (v128_t)__a); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_rcp_ss(__m128 __a) | |
| { | |
| return _mm_move_ss(__a, _mm_rcp_ps(__a)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_sqrt_ps(__m128 __a) | |
| { | |
| return (__m128)wasm_f32x4_sqrt((v128_t)__a); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_sqrt_ss(__m128 __a) | |
| { | |
| return _mm_move_ss(__a, _mm_sqrt_ps(__a)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_rsqrt_ss(__m128 __a) | |
| { | |
| return _mm_move_ss(__a, _mm_rsqrt_ps(__a)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_unpackhi_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)__f32x4_shuffle(__a, __b, 2, 6, 3, 7); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_unpacklo_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)__f32x4_shuffle(__a, __b, 0, 4, 1, 5); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_movehl_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)__f32x4_shuffle(__a, __b, 6, 7, 2, 3); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_movelh_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)__f32x4_shuffle(__a, __b, 0, 1, 4, 5); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmplt_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_lt((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmplt_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmplt_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmple_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_le((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmple_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmple_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpeq_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_eq((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpeq_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpeq_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpge_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_ge((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpge_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpge_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpgt_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_gt((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpgt_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpgt_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cmpord_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_and(wasm_f32x4_eq((v128_t)__a, (v128_t)__a), | |
| wasm_f32x4_eq((v128_t)__b, (v128_t)__b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cmpord_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpord_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cmpunord_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_or(wasm_f32x4_ne((v128_t)__a, (v128_t)__a), | |
| wasm_f32x4_ne((v128_t)__b, (v128_t)__b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cmpunord_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpunord_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_and_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_and((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_andnot_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_andnot((v128_t)__b, (v128_t)__a); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_or_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_or((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_xor_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_xor((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpneq_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_f32x4_ne((v128_t)__a, (v128_t)__b); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpneq_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpneq_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnge_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_not((v128_t)_mm_cmpge_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnge_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpnge_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpngt_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_not((v128_t)_mm_cmpgt_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpngt_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpngt_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnle_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_not((v128_t)_mm_cmple_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnle_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpnle_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnlt_ps(__m128 __a, __m128 __b) | |
| { | |
| return (__m128)wasm_v128_not((v128_t)_mm_cmplt_ps(__a, __b)); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cmpnlt_ss(__m128 __a, __m128 __b) | |
| { | |
| return _mm_move_ss(__a, _mm_cmpnlt_ps(__a, __b)); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comieq_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comige_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comigt_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comile_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comilt_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_comineq_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomieq_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) == wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomige_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) >= wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomigt_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) > wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomile_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) <= wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomilt_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) < wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_ucomineq_ss(__m128 __a, __m128 __b) | |
| { | |
| return wasm_f32x4_extract_lane((v128_t)__a, 0) != wasm_f32x4_extract_lane((v128_t)__b, 0); | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_cvtsi32_ss(__m128 __a, int __b) | |
| { | |
| __f32x4 __v = (__f32x4)__a; | |
| __v[0] = (float)__b; | |
| return (__m128)__v; | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvtss_si32(__m128 __a) | |
| { | |
| float e = ((__f32x4)__a)[0]; | |
| if (e < 2147483648.0f && e >= -2147483648.0f && (lrint(e) != 0 || fabsf(e) < 2.f)) | |
| return lrint(e); | |
| else | |
| return (int)0x80000000; | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) _mm_cvttss_si32(__m128 __a) | |
| { | |
| float e = ((__f32x4)__a)[0]; | |
| if (e < 2147483648.0f && e >= -2147483648.0f && (lrint(e) != 0 || fabsf(e) < 2.f)) | |
| return (int)e; | |
| else | |
| return (int)0x80000000; | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_cvtsi64_ss(__m128 __a, long long __b) | |
| { | |
| __f32x4 __v = (__f32x4)__a; | |
| __v[0] = (float)__b; | |
| return (__m128)__v; | |
| } | |
| static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_cvtss_si64(__m128 __a) | |
| { | |
| float e = ((__f32x4)__a)[0]; | |
| long long x = llrintf(e); | |
| if (e <= LLONG_MAX && e >= LLONG_MIN && (x != 0 || fabsf(e) < 2.f)) | |
| return x; | |
| else | |
| return 0x8000000000000000LL; | |
| } | |
| static __inline__ long long __attribute__((__always_inline__, __nodebug__, DIAGNOSE_SLOW)) | |
| _mm_cvttss_si64(__m128 __a) | |
| { | |
| float e = ((__f32x4)__a)[0]; | |
| long long x = llrintf(e); | |
| if (e <= LLONG_MAX && e >= LLONG_MIN && (x != 0 || fabsf(e) < 2.f)) | |
| return (long long)e; | |
| else | |
| return 0x8000000000000000LL; | |
| } | |
| static __inline__ float __attribute__((__always_inline__, __nodebug__)) | |
| _mm_cvtss_f32(__m128 __a) | |
| { | |
| return (float)((__f32x4)__a)[0]; | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_undefined() | |
| { | |
| __m128 val; | |
| return val; | |
| } | |
| static __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) | |
| _mm_undefined_ps() | |
| { | |
| __m128 val; | |
| return val; | |
| } | |
| static __inline__ int __attribute__((__always_inline__, __nodebug__)) | |
| _mm_getcsr() | |
| { | |
| return _MM_MASK_INEXACT | _MM_MASK_DENORM | _MM_MASK_DIV_ZERO | _MM_MASK_OVERFLOW | _MM_MASK_UNDERFLOW | _MM_MASK_INVALID | |
| | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_OFF; | |
| } | |
| // Unavailable functions: | |
| // void _MM_SET_EXCEPTION_STATE(unsigned int __a); | |
| // void _MM_SET_EXCEPTION_MASK(unsigned int __a); | |
| // void _MM_GET_ROUNDING_MODE(unsigned int __a); | |
| // void _MM_GET_FLUSH_ZERO_MODE(unsigned int __a); | |
Xet Storage Details
- Size:
- 22 kB
- Xet hash:
- 1430c25e75eea84c1786cb2b81d06034379b58f6481eae85fe9d5f70ebc4bcf0
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.