| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | #include "generic.h" |
| |
|
| | #undef T |
| | #undef SFX |
| | #undef VSIZE |
| | #undef VSFX |
| | #undef VTYPE |
| | #undef VSIZEavx |
| | #undef VSFXavx |
| | #undef VTYPEavx |
| |
|
| | #if (FLT == VL_TYPE_FLOAT) |
| | # define T float |
| | # define SFX f |
| | #elif (FLT == VL_TYPE_DOUBLE) |
| | # define T double |
| | # define SFX d |
| | #elif (FLT == VL_TYPE_UINT32) |
| | # define T vl_uint32 |
| | # define SFX ui32 |
| | #elif (FLT == VL_TYPE_INT32) |
| | # define T vl_int32 |
| | # define SFX i32 |
| | #endif |
| |
|
| | |
| | |
| | |
| |
|
| | #ifdef __AVX__ |
| |
|
| | #if (FLT == VL_TYPE_FLOAT) |
| | # define VSIZEavx 8 |
| | # define VSFXavx s |
| | # define VTYPEavx __m256 |
| | #elif (FLT == VL_TYPE_DOUBLE) |
| | # define VSIZEavx 4 |
| | # define VSFXavx d |
| | # define VTYPEavx __m256d |
| | #endif |
| |
|
| | #define VALIGNEDavx(x) (! (((vl_uintptr)(x)) & 0x1F)) |
| |
|
| | #define VMULavx VL_XCAT(_mm256_mul_p, VSFX) |
| | #define VDIVavx VL_XCAT(_mm256_div_p, VSFX) |
| | #define VADDavx VL_XCAT(_mm256_add_p, VSFX) |
| | #define VHADDavx VL_XCAT(_mm_hadd_p, VSFX) |
| | #define VHADD2avx VL_XCAT(_mm256_hadd_p, VSFX) |
| | #define VSUBavx VL_XCAT(_mm256_sub_p, VSFX) |
| | #define VSTZavx VL_XCAT(_mm256_setzero_p, VSFX) |
| | #define VLD1avx VL_XCAT(_mm256_broadcast_s, VSFX) |
| | #define VLDUavx VL_XCAT(_mm256_loadu_p, VSFX) |
| | #define VST1avx VL_XCAT(_mm256_store_s, VSFX) |
| | #define VST2avx VL_XCAT(_mm256_store_p, VSFX) |
| | #define VST2Uavx VL_XCAT(_mm256_storeu_p, VSFX) |
| | #define VPERMavx VL_XCAT(_mm256_permute2f128_p, VSFX) |
| | |
| | #define VCSTavx VL_XCAT5(_mm256_castp,VSFX,256_p,VSFX,128) |
| |
|
| | |
| | #endif |
| |
|
| | |
| | |
| | |
| |
|
| | #ifdef __SSE2__ |
| |
|
| | #if (FLT == VL_TYPE_FLOAT) |
| | # define VSIZE 4 |
| | # define VSFX s |
| | # define VTYPE __m128 |
| | #elif (FLT == VL_TYPE_DOUBLE) |
| | # define VSIZE 2 |
| | # define VSFX d |
| | # define VTYPE __m128d |
| | #endif |
| |
|
| | #define VALIGNED(x) (! (((vl_uintptr)(x)) & 0xF)) |
| |
|
| | #define VMAX VL_XCAT(_mm_max_p, VSFX) |
| | #define VMUL VL_XCAT(_mm_mul_p, VSFX) |
| | #define VDIV VL_XCAT(_mm_div_p, VSFX) |
| | #define VADD VL_XCAT(_mm_add_p, VSFX) |
| | #define VSUB VL_XCAT(_mm_sub_p, VSFX) |
| | #define VSTZ VL_XCAT(_mm_setzero_p, VSFX) |
| | #define VLD1 VL_XCAT(_mm_load1_p, VSFX) |
| | #define VLDU VL_XCAT(_mm_loadu_p, VSFX) |
| | #define VST1 VL_XCAT(_mm_store_s, VSFX) |
| | #define VSET1 VL_XCAT(_mm_set_s, VSFX) |
| | #define VSHU VL_XCAT(_mm_shuffle_p, VSFX) |
| | #define VNEQ VL_XCAT(_mm_cmpneq_p, VSFX) |
| | #define VAND VL_XCAT(_mm_and_p, VSFX) |
| | #define VANDN VL_XCAT(_mm_andnot_p, VSFX) |
| | #define VST2 VL_XCAT(_mm_store_p, VSFX) |
| | #define VST2U VL_XCAT(_mm_storeu_p, VSFX) |
| |
|
| | |
| | #endif |
| |
|
| |
|