/* ---------------------------------------------------------------------------- Copyright (c) 2019-2024 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- Bit operation, and platform dependent definition (MI_INTPTR_SIZE etc) ---------------------------------------------------------------------------- */ #pragma once #ifndef MI_BITS_H #define MI_BITS_H #include // size_t #include // int64_t etc #include // bool // ------------------------------------------------------ // Size of a pointer. // We assume that `sizeof(void*)==sizeof(intptr_t)` // and it holds for all platforms we know of. // // However, the C standard only requires that: // p == (void*)((intptr_t)p)) // but we also need: // i == (intptr_t)((void*)i) // or otherwise one might define an intptr_t type that is larger than a pointer... // ------------------------------------------------------ #if INTPTR_MAX > INT64_MAX # define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) #elif INTPTR_MAX == INT64_MAX # define MI_INTPTR_SHIFT (3) #elif INTPTR_MAX == INT32_MAX # define MI_INTPTR_SHIFT (2) #else #error platform pointers must be 32, 64, or 128 bits #endif #if (INTPTR_MAX) > LONG_MAX # define MI_PU(x) x##ULL #else # define MI_PU(x) x##UL #endif #if SIZE_MAX == UINT64_MAX # define MI_SIZE_SHIFT (3) typedef int64_t mi_ssize_t; #elif SIZE_MAX == UINT32_MAX # define MI_SIZE_SHIFT (2) typedef int32_t mi_ssize_t; #else #error platform objects must be 32 or 64 bits in size #endif #if (SIZE_MAX/2) > LONG_MAX # define MI_ZU(x) x##ULL #else # define MI_ZU(x) x##UL #endif #define MI_INTPTR_SIZE (1< #elif MI_ARCH_ARM64 && MI_OPT_SIMD #include #endif #if defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) #include #endif #if MI_ARCH_X64 && defined(__AVX2__) && !defined(__BMI2__) // msvc #define __BMI2__ 1 #endif #if MI_ARCH_X64 && (defined(__AVX2__) || defined(__BMI2__)) && !defined(__BMI1__) // msvc #define __BMI1__ 1 #endif #if MI_ARCH_X64 && defined(__AVX2__) && !defined(__LZCNT__) // msvc #define __LZCNT__ 1 #endif // Define big endian if needed // #define MI_BIG_ENDIAN 1 // maximum virtual address bits in a user-space pointer #if MI_DEFAULT_VIRTUAL_ADDRESS_BITS > 0 #define MI_MAX_VABITS MI_DEFAULT_VIRTUAL_ADDRESS_BITS #elif MI_ARCH_X64 #define MI_MAX_VABITS (47) #elif MI_INTPTR_SIZE > 4 #define MI_MAX_VABITS (48) #else #define MI_MAX_VABITS (32) #endif // use a flat page-map or a 2-level one #ifndef MI_PAGE_MAP_FLAT #if MI_MAX_VABITS <= 40 && !defined(__APPLE__) && MI_SECURE==0 && !MI_PAGE_META_IS_SEPARATED #define MI_PAGE_MAP_FLAT 1 #else #define MI_PAGE_MAP_FLAT 0 #endif #endif /* -------------------------------------------------------------------------------- Builtin's -------------------------------------------------------------------------------- */ #ifndef __has_builtin #define __has_builtin(x) 0 #endif #define mi_builtin(name) __builtin_##name #define mi_has_builtin(name) __has_builtin(__builtin_##name) #if (LONG_MAX == INT32_MAX) #define mi_builtin32(name) mi_builtin(name##l) #define mi_has_builtin32(name) mi_has_builtin(name##l) #else #define mi_builtin32(name) mi_builtin(name) #define mi_has_builtin32(name) mi_has_builtin(name) #endif #if (LONG_MAX == INT64_MAX) #define mi_builtin64(name) mi_builtin(name##l) #define mi_has_builtin64(name) mi_has_builtin(name##l) #else #define mi_builtin64(name) mi_builtin(name##ll) #define mi_has_builtin64(name) mi_has_builtin(name##ll) #endif #if (MI_SIZE_BITS == 32) #define mi_builtinz(name) mi_builtin32(name) #define mi_has_builtinz(name) mi_has_builtin32(name) #define mi_msc_builtinz(name) name #elif (MI_SIZE_BITS == 64) #define mi_builtinz(name) mi_builtin64(name) #define mi_has_builtinz(name) mi_has_builtin64(name) #define mi_msc_builtinz(name) name##64 #endif /* -------------------------------------------------------------------------------- Popcount and count trailing/leading zero's -------------------------------------------------------------------------------- */ size_t _mi_popcount_generic(size_t x); static inline size_t mi_popcount(size_t x) { #if mi_has_builtinz(popcount) return mi_builtinz(popcount)(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) return mi_msc_builtinz(__popcnt)(x); #elif MI_ARCH_X64 && defined(__BMI1__) return (size_t)_mm_popcnt_u64(x); #else #define MI_HAS_FAST_POPCOUNT 0 return (x<=1 ? x : _mi_popcount_generic(x)); #endif } #ifndef MI_HAS_FAST_POPCOUNT #define MI_HAS_FAST_POPCOUNT 1 #endif size_t _mi_clz_generic(size_t x); size_t _mi_ctz_generic(size_t x); static inline size_t mi_ctz(size_t x) { #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) // on x64 tzcnt is defined for 0 size_t r; __asm ("tzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__BMI1__) return _tzcnt_u64(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long idx; return (mi_msc_builtinz(_BitScanForward)(&idx, x) ? (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(ctz) return (x!=0 ? (size_t)mi_builtinz(ctz)(x) : MI_SIZE_BITS); #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) size_t r = MI_SIZE_BITS; // bsf leaves destination unmodified if the argument is 0 (see ) __asm ("bsf\t%1, %0" : "+r"(r) : "r"(x) : "cc"); return r; #elif MI_HAS_FAST_POPCOUNT return (x!=0 ? (mi_popcount(x^(x-1))-1) : MI_SIZE_BITS); #else #define MI_HAS_FAST_BITSCAN 0 return (x!=0 ? _mi_ctz_generic(x) : MI_SIZE_BITS); #endif } static inline size_t mi_clz(size_t x) { #if defined(__GNUC__) && MI_ARCH_X64 && defined(__LZCNT__) // on x64 lzcnt is defined for 0 size_t r; __asm ("lzcnt\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return r; #elif defined(_MSC_VER) && MI_ARCH_X64 && defined(__LZCNT__) return _lzcnt_u64(x); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long idx; return (mi_msc_builtinz(_BitScanReverse)(&idx, x) ? MI_SIZE_BITS - 1 - (size_t)idx : MI_SIZE_BITS); #elif mi_has_builtinz(clz) return (x!=0 ? (size_t)mi_builtinz(clz)(x) : MI_SIZE_BITS); #elif defined(__GNUC__) && (MI_ARCH_X64 || MI_ARCH_X86) if (x==0) return MI_SIZE_BITS; size_t r; __asm ("bsr\t%1, %0" : "=r"(r) : "r"(x) : "cc"); return (MI_SIZE_BITS - 1 - r); #else #define MI_HAS_FAST_BITSCAN 0 return (x!=0 ? _mi_clz_generic(x) : MI_SIZE_BITS); #endif } #ifndef MI_HAS_FAST_BITSCAN #define MI_HAS_FAST_BITSCAN 1 #endif /* -------------------------------------------------------------------------------- find trailing/leading zero (bit scan forward/reverse) -------------------------------------------------------------------------------- */ // Bit scan forward: find the least significant bit that is set (i.e. count trailing zero's) // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). static inline bool mi_bsf(size_t x, size_t* idx) { #if defined(__GNUC__) && MI_ARCH_X64 && defined(__BMI1__) && (!defined(__clang_major__) || __clang_major__ >= 9) // on x64 the carry flag is set on zero which gives better codegen bool is_zero; __asm ( "tzcnt\t%2, %1" : "=@ccc"(is_zero), "=r"(*idx) : "r"(x) : "cc" ); return !is_zero; #elif 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long i; return (mi_msc_builtinz(_BitScanForward)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = mi_ctz(x), true) : false); #endif } // Bit scan reverse: find the most significant bit that is set // return false if `x==0` (with `*idx` undefined) and true otherwise, // with the `idx` is set to the bit index (`0 <= *idx < MI_BFIELD_BITS`). static inline bool mi_bsr(size_t x, size_t* idx) { #if 0 && defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) unsigned long i; return (mi_msc_builtinz(_BitScanReverse)(&i, x) ? (*idx = (size_t)i, true) : false); #else return (x!=0 ? (*idx = MI_SIZE_BITS - 1 - mi_clz(x), true) : false); #endif } /* -------------------------------------------------------------------------------- rotate -------------------------------------------------------------------------------- */ static inline size_t mi_rotr(size_t x, size_t r) { #if (mi_has_builtin(rotateright64) && MI_SIZE_BITS==64) return mi_builtin(rotateright64)(x,r); #elif (mi_has_builtin(rotateright32) && MI_SIZE_BITS==32) return mi_builtin(rotateright32)(x,r); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64) return _rotr64(x, (int)r); #elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32) return _lrotr(x,(int)r); #else // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to // avoid UB when `rshift==0`. See const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1); return ((x >> rshift) | (x << ((-rshift) & (MI_SIZE_BITS-1)))); #endif } static inline size_t mi_rotl(size_t x, size_t r) { #if (mi_has_builtin(rotateleft64) && MI_SIZE_BITS==64) return mi_builtin(rotateleft64)(x,r); #elif (mi_has_builtin(rotateleft32) && MI_SIZE_BITS==32) return mi_builtin(rotateleft32)(x,r); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_ARM64) return _rotl64(x, (int)r); #elif defined(_MSC_VER) && (MI_ARCH_X86 || MI_ARCH_ARM32) return _lrotl(x, (int)r); #else // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to // avoid UB when `rshift==0`. See const unsigned int rshift = (unsigned int)(r) & (MI_SIZE_BITS-1); return ((x << rshift) | (x >> ((-rshift) & (MI_SIZE_BITS-1)))); #endif } static inline uint32_t mi_rotl32(uint32_t x, uint32_t r) { #if mi_has_builtin(rotateleft32) return mi_builtin(rotateleft32)(x,r); #elif defined(_MSC_VER) && (MI_ARCH_X64 || MI_ARCH_X86 || MI_ARCH_ARM64 || MI_ARCH_ARM32) return _lrotl(x, (int)r); #else // The term `(-rshift)&(BITS-1)` is written instead of `BITS - rshift` to // avoid UB when `rshift==0`. See const unsigned int rshift = (unsigned int)(r) & 31; return ((x << rshift) | (x >> ((-rshift) & 31))); #endif } #endif // MI_BITS_H