| |
| |
|
|
| #pragma once |
|
|
| #include "ck/ck.hpp" |
| #include "integral_constant.hpp" |
| #include "number.hpp" |
| #include "type.hpp" |
| #include "tuple.hpp" |
|
|
| namespace ck { |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| struct MagicDivision |
| { |
| |
| __host__ __device__ static constexpr auto CalculateMagicNumbers(uint32_t divisor) |
| { |
| |
| |
| |
| if(divisor >= 1 && divisor <= INT32_MAX) |
| { |
| uint32_t shift = 0; |
| for(shift = 0; shift < 32; ++shift) |
| { |
| if((1U << shift) >= divisor) |
| { |
| break; |
| } |
| } |
|
|
| uint64_t one = 1; |
| uint64_t multiplier = ((one << 32) * ((one << shift) - divisor)) / divisor + 1; |
| |
|
|
| return make_tuple(uint32_t(multiplier), shift); |
| } |
| else |
| { |
| return make_tuple(uint32_t(0), uint32_t(0)); |
| } |
| } |
|
|
| __host__ __device__ static constexpr uint32_t CalculateMagicMultiplier(uint32_t divisor) |
| { |
| auto tmp = CalculateMagicNumbers(divisor); |
|
|
| return tmp[Number<0>{}]; |
| } |
|
|
| __host__ __device__ static constexpr uint32_t CalculateMagicShift(uint32_t divisor) |
| { |
| auto tmp = CalculateMagicNumbers(divisor); |
|
|
| return tmp[Number<1>{}]; |
| } |
|
|
| |
| template <uint32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicNumbers(integral_constant<uint32_t, Divisor>) |
| { |
| constexpr auto tmp = CalculateMagicNumbers(uint32_t{Divisor}); |
|
|
| constexpr uint32_t multiplier = tmp[Number<0>{}]; |
| constexpr uint32_t shift = tmp[Number<1>{}]; |
|
|
| return make_tuple(integral_constant<uint32_t, multiplier>{}, |
| integral_constant<uint32_t, shift>{}); |
| } |
|
|
| template <uint32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>) |
| { |
| constexpr uint32_t multiplier = CalculateMagicMultiplier(uint32_t{Divisor}); |
|
|
| return integral_constant<uint32_t, multiplier>{}; |
| } |
|
|
| template <uint32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicShift(integral_constant<uint32_t, Divisor>) |
| { |
| constexpr uint32_t shift = CalculateMagicShift(uint32_t{Divisor}); |
|
|
| return integral_constant<uint32_t, shift>{}; |
| } |
|
|
| |
| template <int32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicNumbers(integral_constant<int32_t, Divisor>) |
| { |
| return CalculateMagicNumbers(integral_constant<uint32_t, Divisor>{}); |
| } |
|
|
| template <int32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicMultiplier(integral_constant<int32_t, Divisor>) |
| { |
| return CalculateMagicMultiplier(integral_constant<uint32_t, Divisor>{}); |
| } |
|
|
| template <int32_t Divisor> |
| __host__ __device__ static constexpr auto |
| CalculateMagicShift(integral_constant<int32_t, Divisor>) |
| { |
| return CalculateMagicShift(integral_constant<uint32_t, Divisor>{}); |
| } |
|
|
| |
| __device__ static constexpr uint32_t |
| DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift) |
| { |
| uint32_t tmp = __umulhi(dividend, multiplier); |
| return (tmp + dividend) >> shift; |
| } |
|
|
| __host__ static constexpr uint32_t |
| DoMagicDivision(uint32_t dividend, uint32_t multiplier, uint32_t shift) |
| { |
| uint32_t tmp = static_cast<uint64_t>(dividend) * multiplier >> 32; |
| return (tmp + dividend) >> shift; |
| } |
|
|
| |
| |
| |
| |
| __device__ static constexpr int32_t |
| DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift) |
| { |
| uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32); |
| uint32_t tmp = __umulhi(dividend_u32, multiplier); |
| return (tmp + dividend_u32) >> shift; |
| } |
|
|
| __host__ static constexpr int32_t |
| DoMagicDivision(int32_t dividend_i32, uint32_t multiplier, uint32_t shift) |
| { |
| uint32_t dividend_u32 = bit_cast<uint32_t>(dividend_i32); |
| uint32_t tmp = static_cast<uint64_t>(dividend_u32) * multiplier >> 32; |
| return (tmp + dividend_u32) >> shift; |
| } |
| }; |
|
|
| struct MDiv |
| { |
| |
| uint32_t divisor; |
| uint32_t multiplier; |
| uint32_t shift; |
|
|
| |
| __host__ __device__ MDiv(uint32_t divisor_) : divisor(divisor_) |
| { |
| auto tmp = MagicDivision::CalculateMagicNumbers(divisor_); |
|
|
| multiplier = tmp[Number<0>{}]; |
| shift = tmp[Number<1>{}]; |
| } |
|
|
| __host__ __device__ MDiv() : divisor(0), multiplier(0), shift(0) {} |
|
|
| __host__ __device__ void update(uint32_t divisor_) |
| { |
| divisor = divisor_; |
| auto tmp = MagicDivision::CalculateMagicNumbers(divisor_); |
|
|
| multiplier = tmp[Number<0>{}]; |
| shift = tmp[Number<1>{}]; |
| } |
|
|
| __host__ __device__ uint32_t div(uint32_t dividend_) const |
| { |
| return MagicDivision::DoMagicDivision(dividend_, multiplier, shift); |
| } |
|
|
| __host__ __device__ void |
| divmod(uint32_t dividend_, uint32_t& quotient_, uint32_t& remainder_) const |
| { |
| quotient_ = div(dividend_); |
| remainder_ = dividend_ - (quotient_ * divisor); |
| } |
|
|
| __host__ __device__ uint32_t get() const { return divisor; } |
| }; |
|
|
| struct MDiv2 |
| { |
| |
| uint32_t multiplier; |
| uint32_t shift; |
|
|
| |
| __host__ __device__ MDiv2(uint32_t divisor_) |
| { |
| auto tmp = MagicDivision::CalculateMagicNumbers(divisor_); |
|
|
| multiplier = tmp[Number<0>{}]; |
| shift = tmp[Number<1>{}]; |
| } |
|
|
| __host__ __device__ MDiv2() : multiplier(0), shift(0) {} |
|
|
| __host__ __device__ uint32_t div(uint32_t dividend_) const |
| { |
| return MagicDivision::DoMagicDivision(dividend_, multiplier, shift); |
| } |
|
|
| __host__ __device__ void |
| divmod(uint32_t dividend_, uint32_t divisor_, uint32_t& quotient_, uint32_t& remainder_) const |
| { |
| quotient_ = div(dividend_); |
| remainder_ = dividend_ - (quotient_ * divisor_); |
| } |
| }; |
|
|
| } |
|
|