| |
| |
| |
| |
| |
| |
| |
|
|
| #include "src/math/cbrt.h" |
| #include "hdr/fenv_macros.h" |
| #include "src/__support/FPUtil/FEnvImpl.h" |
| #include "src/__support/FPUtil/FPBits.h" |
| #include "src/__support/FPUtil/PolyEval.h" |
| #include "src/__support/FPUtil/double_double.h" |
| #include "src/__support/FPUtil/dyadic_float.h" |
| #include "src/__support/FPUtil/multiply_add.h" |
| #include "src/__support/common.h" |
| #include "src/__support/integer_literals.h" |
| #include "src/__support/macros/config.h" |
| #include "src/__support/macros/optimization.h" |
|
|
| #if ((LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) != 0) |
| #define LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| #endif |
|
|
| namespace LIBC_NAMESPACE_DECL { |
|
|
| using DoubleDouble = fputil::DoubleDouble; |
| using Float128 = fputil::DyadicFloat<128>; |
|
|
| namespace { |
|
|
| |
| |
| |
| |
| |
| double intial_approximation(double x) { |
| constexpr double COEFFS[8] = { |
| 0x1.bc52aedead5c6p1, -0x1.b52bfebf110b3p2, 0x1.1d8d71d53d126p3, |
| -0x1.de2db9e81cf87p2, 0x1.0154ca06153bdp2, -0x1.5973c66ee6da7p0, |
| 0x1.07bf6ac832552p-2, -0x1.5e53d9ce41cb8p-6, |
| }; |
|
|
| double x_sq = x * x; |
|
|
| double c0 = fputil::multiply_add(x, COEFFS[1], COEFFS[0]); |
| double c1 = fputil::multiply_add(x, COEFFS[3], COEFFS[2]); |
| double c2 = fputil::multiply_add(x, COEFFS[5], COEFFS[4]); |
| double c3 = fputil::multiply_add(x, COEFFS[7], COEFFS[6]); |
|
|
| double x_4 = x_sq * x_sq; |
| double d0 = fputil::multiply_add(x_sq, c1, c0); |
| double d1 = fputil::multiply_add(x_sq, c3, c2); |
|
|
| return fputil::multiply_add(x_4, d1, d0); |
| } |
|
|
| |
| |
| #ifdef LIBC_TARGET_CPU_HAS_FMA_DOUBLE |
| double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { |
| return fputil::multiply_add(x_3.hi, a_sq.hi, -1.0) + |
| fputil::multiply_add(x_3.lo, a_sq.hi, x_3.hi * a_sq.lo); |
| } |
| #else |
| double get_error(const DoubleDouble &x_3, const DoubleDouble &a_sq) { |
| DoubleDouble x_3_a_sq = fputil::quick_mult(a_sq, x_3); |
| return (x_3_a_sq.hi - 1.0) + x_3_a_sq.lo; |
| } |
| #endif |
|
|
| } |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| LLVM_LIBC_FUNCTION(double, cbrt, (double x)) { |
| using FPBits = fputil::FPBits<double>; |
|
|
| uint64_t x_abs = FPBits(x).abs().uintval(); |
|
|
| unsigned exp_bias_correction = 682; |
|
|
| if (LIBC_UNLIKELY(x_abs < FPBits::min_normal().uintval() || |
| x_abs >= FPBits::inf().uintval())) { |
| if (x == 0.0 || x_abs >= FPBits::inf().uintval()) |
| |
| |
| return static_cast<double>(x + x); |
|
|
| |
| |
| x *= 0x1.0p60; |
| exp_bias_correction -= 20; |
| } |
|
|
| FPBits x_bits(x); |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| unsigned x_e = static_cast<unsigned>(x_bits.get_biased_exponent()); |
| unsigned out_e = (x_e / 3 + exp_bias_correction); |
| unsigned shift_e = x_e % 3; |
|
|
| |
| double x_r = |
| FPBits(x_bits.get_mantissa() | |
| (static_cast<uint64_t>(FPBits::EXP_BIAS) << FPBits::FRACTION_LEN)) |
| .get_val(); |
|
|
| |
| uint64_t a_bits = x_bits.uintval() & 0x800F'FFFF'FFFF'FFFF; |
| a_bits |= |
| (static_cast<uint64_t>(shift_e + static_cast<unsigned>(FPBits::EXP_BIAS)) |
| << FPBits::FRACTION_LEN); |
| double a = FPBits(a_bits).get_val(); |
|
|
| |
| double p = intial_approximation(x_r); |
|
|
| |
| constexpr double EXP2_M2_OVER_3[3] = {1.0, 0x1.428a2f98d728bp-1, |
| 0x1.965fea53d6e3dp-2}; |
|
|
| |
| |
| double x0 = static_cast<double>(EXP2_M2_OVER_3[shift_e] * p); |
|
|
| |
| DoubleDouble a_sq = fputil::exact_mult(a, a); |
|
|
| |
| DoubleDouble x0_sq = fputil::exact_mult(x0, x0); |
| DoubleDouble x0_3 = fputil::quick_mult(x0, x0_sq); |
|
|
| double h0 = get_error(x0_3, a_sq); |
|
|
| #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| constexpr double REL_ERROR = 0; |
| #else |
| constexpr double REL_ERROR = 0x1.0p-51; |
| #endif |
|
|
| |
| |
| constexpr double ERR_COEFFS[3] = { |
| -0x1.5555555555555p-2 - REL_ERROR, |
| 0x1.c71c71c71c71cp-3, |
| -0x1.61f9add3c0ca4p-3, |
| }; |
| |
| double e0 = fputil::polyeval(h0, ERR_COEFFS[0], ERR_COEFFS[1], ERR_COEFFS[2]); |
| double x0_h0 = x0 * h0; |
|
|
| |
| |
| |
| DoubleDouble x1_dd{x0_h0 * e0, x0}; |
|
|
| |
| DoubleDouble r1 = fputil::quick_mult(a, x1_dd); |
|
|
| |
| auto update_exponent = [=](double r) -> double { |
| uint64_t r_m = FPBits(r).uintval() - 0x3FF0'0000'0000'0000; |
| |
| uint64_t r_bits = |
| r_m + (static_cast<uint64_t>(out_e) << FPBits::FRACTION_LEN); |
| return FPBits(r_bits).get_val(); |
| }; |
|
|
| #ifdef LIBC_MATH_CBRT_SKIP_ACCURATE_PASS |
| |
| |
| return update_exponent(r1.hi + r1.lo); |
| #else |
| |
| double r1_lower = r1.hi + r1.lo; |
| double r1_upper = |
| r1.hi + fputil::multiply_add(x0_h0, 2.0 * REL_ERROR * a, r1.lo); |
|
|
| |
| if (LIBC_LIKELY(r1_upper == r1_lower)) { |
| |
| |
| if (LIBC_UNLIKELY((FPBits(r1_lower).uintval() & 0x0000'0007'FFFF'FFFF) == |
| 0)) { |
| double r1_err = (r1_lower - r1.hi) - r1.lo; |
| if (FPBits(r1_err).abs().get_val() < 0x1.0p69) |
| fputil::clear_except_if_required(FE_INEXACT); |
| } |
|
|
| return update_exponent(r1_lower); |
| } |
|
|
| |
| double x1 = x1_dd.hi + (e0 + REL_ERROR) * x0_h0; |
|
|
| |
| |
| DoubleDouble x1_sq = fputil::exact_mult(x1, x1); |
| DoubleDouble x1_3 = fputil::quick_mult(x1, x1_sq); |
| double h1 = get_error(x1_3, a_sq); |
|
|
| |
| double e1 = h1 * (x1 * -0x1.5555555555555p-2); |
| |
| DoubleDouble x2 = fputil::exact_add(x1, e1); |
| |
| DoubleDouble r2 = fputil::quick_mult(a, x2); |
|
|
| double r2_upper = r2.hi + fputil::multiply_add(a, 0x1.0p-102, r2.lo); |
| double r2_lower = r2.hi + fputil::multiply_add(a, -0x1.0p-102, r2.lo); |
|
|
| |
| if (LIBC_LIKELY(r2_upper == r2_lower)) |
| return update_exponent(r2_upper); |
|
|
| |
| |
| Float128 x2_f128 = fputil::quick_add(Float128(x2.hi), Float128(x2.lo)); |
| |
| Float128 x2_3 = |
| fputil::quick_mul(fputil::quick_mul(x2_f128, x2_f128), x2_f128); |
| |
| Float128 a_sq_f128 = fputil::quick_mul(Float128(a), Float128(a)); |
| |
| Float128 x2_3_a_sq = fputil::quick_mul(x2_3, a_sq_f128); |
| |
| Float128 h2_f128 = fputil::quick_add(x2_3_a_sq, Float128(-1.0)); |
| double h2 = static_cast<double>(h2_f128); |
| |
| Float128 t2 = |
| fputil::quick_add(Float128(1.0), Float128(h2 * (-0x1.5555555555555p-2))); |
| |
| Float128 x3 = fputil::quick_mul(x2_f128, t2); |
| |
| Float128 r3 = fputil::quick_mul(Float128(a), x3); |
|
|
| |
| Float128::MantissaType rounding_bits = |
| r3.mantissa & 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFFF_u128; |
|
|
| double result = static_cast<double>(r3); |
| if ((rounding_bits < 0x0000'0000'0000'0000'0000'0000'0000'000F_u128) || |
| (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128)) { |
| |
| r3.mantissa &= 0xFFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFFF'FFF0_u128; |
|
|
| if (rounding_bits >= 0x0000'0000'0000'03FF'FFFF'FFFF'FFFF'FFF0_u128) { |
| Float128 tmp{r3.sign, r3.exponent - 123, |
| 0x8000'0000'0000'0000'0000'0000'0000'0000_u128}; |
| Float128 r4 = fputil::quick_add(r3, tmp); |
| result = static_cast<double>(r4); |
| } else { |
| result = static_cast<double>(r3); |
| } |
|
|
| fputil::clear_except_if_required(FE_INEXACT); |
| } |
|
|
| return update_exponent(result); |
| #endif |
| } |
|
|
| } |
|
|