| // Copyright 2014 The Go Authors. All rights reserved. | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| // This file implements multi-precision floating-point numbers. | |
| // Like in the GNU MPFR library (https://www.mpfr.org/), operands | |
| // can be of mixed precision. Unlike MPFR, the rounding mode is | |
| // not specified with each operation, but with each operand. The | |
| // rounding mode of the result operand determines the rounding | |
| // mode of an operation. This is a from-scratch implementation. | |
| package big | |
| import ( | |
| "fmt" | |
| "math" | |
| "math/bits" | |
| ) | |
| const debugFloat = false // enable for debugging | |
| // A nonzero finite Float represents a multi-precision floating point number | |
| // | |
| // sign × mantissa × 2**exponent | |
| // | |
| // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. | |
| // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). | |
| // All Floats are ordered, and the ordering of two Floats x and y | |
| // is defined by x.Cmp(y). | |
| // | |
| // Each Float value also has a precision, rounding mode, and accuracy. | |
| // The precision is the maximum number of mantissa bits available to | |
| // represent the value. The rounding mode specifies how a result should | |
| // be rounded to fit into the mantissa bits, and accuracy describes the | |
| // rounding error with respect to the exact result. | |
| // | |
| // Unless specified otherwise, all operations (including setters) that | |
| // specify a *Float variable for the result (usually via the receiver | |
| // with the exception of [Float.MantExp]), round the numeric result according | |
| // to the precision and rounding mode of the result variable. | |
| // | |
| // If the provided result precision is 0 (see below), it is set to the | |
| // precision of the argument with the largest precision value before any | |
| // rounding takes place, and the rounding mode remains unchanged. Thus, | |
| // uninitialized Floats provided as result arguments will have their | |
| // precision set to a reasonable value determined by the operands, and | |
| // their mode is the zero value for RoundingMode (ToNearestEven). | |
| // | |
| // By setting the desired precision to 24 or 53 and using matching rounding | |
| // mode (typically [ToNearestEven]), Float operations produce the same results | |
| // as the corresponding float32 or float64 IEEE 754 arithmetic for operands | |
| // that correspond to normal (i.e., not denormal) float32 or float64 numbers. | |
| // Exponent underflow and overflow lead to a 0 or an Infinity for different | |
| // values than IEEE 754 because Float exponents have a much larger range. | |
| // | |
| // The zero (uninitialized) value for a Float is ready to use and represents | |
| // the number +0.0 exactly, with precision 0 and rounding mode [ToNearestEven]. | |
| // | |
| // Operations always take pointer arguments (*Float) rather | |
| // than Float values, and each unique Float value requires | |
| // its own unique *Float pointer. To "copy" a Float value, | |
| // an existing (or newly allocated) Float must be set to | |
| // a new value using the [Float.Set] method; shallow copies | |
| // of Floats are not supported and may lead to errors. | |
| type Float struct { | |
| prec uint32 | |
| mode RoundingMode | |
| acc Accuracy | |
| form form | |
| neg bool | |
| mant nat | |
| exp int32 | |
| } | |
| // An ErrNaN panic is raised by a [Float] operation that would lead to | |
| // a NaN under IEEE 754 rules. An ErrNaN implements the error interface. | |
| type ErrNaN struct { | |
| msg string | |
| } | |
| func (err ErrNaN) Error() string { | |
| return err.msg | |
| } | |
| // NewFloat allocates and returns a new [Float] set to x, | |
| // with precision 53 and rounding mode [ToNearestEven]. | |
| // NewFloat panics with [ErrNaN] if x is a NaN. | |
| func NewFloat(x float64) *Float { | |
| if math.IsNaN(x) { | |
| panic(ErrNaN{"NewFloat(NaN)"}) | |
| } | |
| return new(Float).SetFloat64(x) | |
| } | |
| // Exponent and precision limits. | |
| const ( | |
| MaxExp = math.MaxInt32 // largest supported exponent | |
| MinExp = math.MinInt32 // smallest supported exponent | |
| MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited | |
| ) | |
| // Internal representation: The mantissa bits x.mant of a nonzero finite | |
| // Float x are stored in a nat slice long enough to hold up to x.prec bits; | |
| // the slice may (but doesn't have to) be shorter if the mantissa contains | |
| // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., | |
| // the msb is shifted all the way "to the left"). Thus, if the mantissa has | |
| // trailing 0 bits or x.prec is not a multiple of the Word size _W, | |
| // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds | |
| // to the value 0.5; the exponent x.exp shifts the binary point as needed. | |
| // | |
| // A zero or non-finite Float x ignores x.mant and x.exp. | |
| // | |
| // x form neg mant exp | |
| // ---------------------------------------------------------- | |
| // ±0 zero sign - - | |
| // 0 < |x| < +Inf finite sign mantissa exponent | |
| // ±Inf inf sign - - | |
| // A form value describes the internal representation. | |
| type form byte | |
| // The form value order is relevant - do not change! | |
| const ( | |
| zero form = iota | |
| finite | |
| inf | |
| ) | |
| // RoundingMode determines how a [Float] value is rounded to the | |
| // desired precision. Rounding may change the [Float] value; the | |
| // rounding error is described by the [Float]'s [Accuracy]. | |
| type RoundingMode byte | |
| // These constants define supported rounding modes. | |
| const ( | |
| ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven | |
| ToNearestAway // == IEEE 754-2008 roundTiesToAway | |
| ToZero // == IEEE 754-2008 roundTowardZero | |
| AwayFromZero // no IEEE 754-2008 equivalent | |
| ToNegativeInf // == IEEE 754-2008 roundTowardNegative | |
| ToPositiveInf // == IEEE 754-2008 roundTowardPositive | |
| ) | |
| //go:generate stringer -type=RoundingMode | |
| // Accuracy describes the rounding error produced by the most recent | |
| // operation that generated a [Float] value, relative to the exact value. | |
| type Accuracy int8 | |
| // Constants describing the [Accuracy] of a [Float]. | |
| const ( | |
| Below Accuracy = -1 | |
| Exact Accuracy = 0 | |
| Above Accuracy = +1 | |
| ) | |
| //go:generate stringer -type=Accuracy | |
| // SetPrec sets z's precision to prec and returns the (possibly) rounded | |
| // value of z. Rounding occurs according to z's rounding mode if the mantissa | |
| // cannot be represented in prec bits without loss of precision. | |
| // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. | |
| // If prec > [MaxPrec], it is set to [MaxPrec]. | |
| func (z *Float) SetPrec(prec uint) *Float { | |
| z.acc = Exact // optimistically assume no rounding is needed | |
| // special case | |
| if prec == 0 { | |
| z.prec = 0 | |
| if z.form == finite { | |
| // truncate z to 0 | |
| z.acc = makeAcc(z.neg) | |
| z.form = zero | |
| } | |
| return z | |
| } | |
| // general case | |
| if prec > MaxPrec { | |
| prec = MaxPrec | |
| } | |
| old := z.prec | |
| z.prec = uint32(prec) | |
| if z.prec < old { | |
| z.round(0) | |
| } | |
| return z | |
| } | |
| func makeAcc(above bool) Accuracy { | |
| if above { | |
| return Above | |
| } | |
| return Below | |
| } | |
| // SetMode sets z's rounding mode to mode and returns an exact z. | |
| // z remains unchanged otherwise. | |
| // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to [Exact]. | |
| func (z *Float) SetMode(mode RoundingMode) *Float { | |
| z.mode = mode | |
| z.acc = Exact | |
| return z | |
| } | |
| // Prec returns the mantissa precision of x in bits. | |
| // The result may be 0 for |x| == 0 and |x| == Inf. | |
| func (x *Float) Prec() uint { | |
| return uint(x.prec) | |
| } | |
| // MinPrec returns the minimum precision required to represent x exactly | |
| // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). | |
| // The result is 0 for |x| == 0 and |x| == Inf. | |
| func (x *Float) MinPrec() uint { | |
| if x.form != finite { | |
| return 0 | |
| } | |
| return uint(len(x.mant))*_W - x.mant.trailingZeroBits() | |
| } | |
| // Mode returns the rounding mode of x. | |
| func (x *Float) Mode() RoundingMode { | |
| return x.mode | |
| } | |
| // Acc returns the accuracy of x produced by the most recent | |
| // operation, unless explicitly documented otherwise by that | |
| // operation. | |
| func (x *Float) Acc() Accuracy { | |
| return x.acc | |
| } | |
| // Sign returns: | |
| // - -1 if x < 0; | |
| // - 0 if x is ±0; | |
| // - +1 if x > 0. | |
| func (x *Float) Sign() int { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| if x.form == zero { | |
| return 0 | |
| } | |
| if x.neg { | |
| return -1 | |
| } | |
| return 1 | |
| } | |
| // MantExp breaks x into its mantissa and exponent components | |
| // and returns the exponent. If a non-nil mant argument is | |
| // provided its value is set to the mantissa of x, with the | |
| // same precision and rounding mode as x. The components | |
| // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. | |
| // Calling MantExp with a nil argument is an efficient way to | |
| // get the exponent of the receiver. | |
| // | |
| // Special cases are: | |
| // | |
| // ( ±0).MantExp(mant) = 0, with mant set to ±0 | |
| // (±Inf).MantExp(mant) = 0, with mant set to ±Inf | |
| // | |
| // x and mant may be the same in which case x is set to its | |
| // mantissa value. | |
| func (x *Float) MantExp(mant *Float) (exp int) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| if x.form == finite { | |
| exp = int(x.exp) | |
| } | |
| if mant != nil { | |
| mant.Copy(x) | |
| if mant.form == finite { | |
| mant.exp = 0 | |
| } | |
| } | |
| return | |
| } | |
| func (z *Float) setExpAndRound(exp int64, sbit uint) { | |
| if exp < MinExp { | |
| // underflow | |
| z.acc = makeAcc(z.neg) | |
| z.form = zero | |
| return | |
| } | |
| if exp > MaxExp { | |
| // overflow | |
| z.acc = makeAcc(!z.neg) | |
| z.form = inf | |
| return | |
| } | |
| z.form = finite | |
| z.exp = int32(exp) | |
| z.round(sbit) | |
| } | |
| // SetMantExp sets z to mant × 2**exp and returns z. | |
| // The result z has the same precision and rounding mode | |
| // as mant. SetMantExp is an inverse of [Float.MantExp] but does | |
| // not require 0.5 <= |mant| < 1.0. Specifically, for a | |
| // given x of type *[Float], SetMantExp relates to [Float.MantExp] | |
| // as follows: | |
| // | |
| // mant := new(Float) | |
| // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 | |
| // | |
| // Special cases are: | |
| // | |
| // z.SetMantExp( ±0, exp) = ±0 | |
| // z.SetMantExp(±Inf, exp) = ±Inf | |
| // | |
| // z and mant may be the same in which case z's exponent | |
| // is set to exp. | |
| func (z *Float) SetMantExp(mant *Float, exp int) *Float { | |
| if debugFloat { | |
| z.validate() | |
| mant.validate() | |
| } | |
| z.Copy(mant) | |
| if z.form == finite { | |
| // 0 < |mant| < +Inf | |
| z.setExpAndRound(int64(z.exp)+int64(exp), 0) | |
| } | |
| return z | |
| } | |
| // Signbit reports whether x is negative or negative zero. | |
| func (x *Float) Signbit() bool { | |
| return x.neg | |
| } | |
| // IsInf reports whether x is +Inf or -Inf. | |
| func (x *Float) IsInf() bool { | |
| return x.form == inf | |
| } | |
| // IsInt reports whether x is an integer. | |
| // ±Inf values are not integers. | |
| func (x *Float) IsInt() bool { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| // special cases | |
| if x.form != finite { | |
| return x.form == zero | |
| } | |
| // x.form == finite | |
| if x.exp <= 0 { | |
| return false | |
| } | |
| // x.exp > 0 | |
| return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa | |
| } | |
| // debugging support | |
| func (x *Float) validate() { | |
| if !debugFloat { | |
| // avoid performance bugs | |
| panic("validate called but debugFloat is not set") | |
| } | |
| if msg := x.validate0(); msg != "" { | |
| panic(msg) | |
| } | |
| } | |
| func (x *Float) validate0() string { | |
| if x.form != finite { | |
| return "" | |
| } | |
| m := len(x.mant) | |
| if m == 0 { | |
| return "nonzero finite number with empty mantissa" | |
| } | |
| const msb = 1 << (_W - 1) | |
| if x.mant[m-1]&msb == 0 { | |
| return fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)) | |
| } | |
| if x.prec == 0 { | |
| return "zero precision finite number" | |
| } | |
| return "" | |
| } | |
| // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. | |
| // sbit must be 0 or 1 and summarizes any "sticky bit" information one might | |
| // have before calling round. z's mantissa must be normalized (with the msb set) | |
| // or empty. | |
| // | |
| // CAUTION: The rounding modes [ToNegativeInf], [ToPositiveInf] are affected by the | |
| // sign of z. For correct rounding, the sign of z must be set correctly before | |
| // calling round. | |
| func (z *Float) round(sbit uint) { | |
| if debugFloat { | |
| z.validate() | |
| } | |
| z.acc = Exact | |
| if z.form != finite { | |
| // ±0 or ±Inf => nothing left to do | |
| return | |
| } | |
| // z.form == finite && len(z.mant) > 0 | |
| // m > 0 implies z.prec > 0 (checked by validate) | |
| m := uint32(len(z.mant)) // present mantissa length in words | |
| bits := m * _W // present mantissa bits; bits > 0 | |
| if bits <= z.prec { | |
| // mantissa fits => nothing to do | |
| return | |
| } | |
| // bits > z.prec | |
| // Rounding is based on two bits: the rounding bit (rbit) and the | |
| // sticky bit (sbit). The rbit is the bit immediately before the | |
| // z.prec leading mantissa bits (the "0.5"). The sbit is set if any | |
| // of the bits before the rbit are set (the "0.25", "0.125", etc.): | |
| // | |
| // rbit sbit => "fractional part" | |
| // | |
| // 0 0 == 0 | |
| // 0 1 > 0 , < 0.5 | |
| // 1 0 == 0.5 | |
| // 1 1 > 0.5, < 1.0 | |
| // bits > z.prec: mantissa too large => round | |
| r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 | |
| rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit | |
| // The sticky bit is only needed for rounding ToNearestEven | |
| // or when the rounding bit is zero. Avoid computation otherwise. | |
| if sbit == 0 && (rbit == 0 || z.mode == ToNearestEven) { | |
| sbit = z.mant.sticky(r) | |
| } | |
| sbit &= 1 // be safe and ensure it's a single bit | |
| // cut off extra words | |
| n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision | |
| if m > n { | |
| copy(z.mant, z.mant[m-n:]) // move n last words to front | |
| z.mant = z.mant[:n] | |
| } | |
| // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word | |
| ntz := n*_W - z.prec // 0 <= ntz < _W | |
| lsb := Word(1) << ntz | |
| // round if result is inexact | |
| if rbit|sbit != 0 { | |
| // Make rounding decision: The result mantissa is truncated ("rounded down") | |
| // by default. Decide if we need to increment, or "round up", the (unsigned) | |
| // mantissa. | |
| inc := false | |
| switch z.mode { | |
| case ToNegativeInf: | |
| inc = z.neg | |
| case ToZero: | |
| // nothing to do | |
| case ToNearestEven: | |
| inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) | |
| case ToNearestAway: | |
| inc = rbit != 0 | |
| case AwayFromZero: | |
| inc = true | |
| case ToPositiveInf: | |
| inc = !z.neg | |
| default: | |
| panic("unreachable") | |
| } | |
| // A positive result (!z.neg) is Above the exact result if we increment, | |
| // and it's Below if we truncate (Exact results require no rounding). | |
| // For a negative result (z.neg) it is exactly the opposite. | |
| z.acc = makeAcc(inc != z.neg) | |
| if inc { | |
| // add 1 to mantissa | |
| if addVW(z.mant, z.mant, lsb) != 0 { | |
| // mantissa overflow => adjust exponent | |
| if z.exp >= MaxExp { | |
| // exponent overflow | |
| z.form = inf | |
| return | |
| } | |
| z.exp++ | |
| // adjust mantissa: divide by 2 to compensate for exponent adjustment | |
| rshVU(z.mant, z.mant, 1) | |
| // set msb == carry == 1 from the mantissa overflow above | |
| const msb = 1 << (_W - 1) | |
| z.mant[n-1] |= msb | |
| } | |
| } | |
| } | |
| // zero out trailing bits in least-significant word | |
| z.mant[0] &^= lsb - 1 | |
| if debugFloat { | |
| z.validate() | |
| } | |
| } | |
| func (z *Float) setBits64(neg bool, x uint64) *Float { | |
| if z.prec == 0 { | |
| z.prec = 64 | |
| } | |
| z.acc = Exact | |
| z.neg = neg | |
| if x == 0 { | |
| z.form = zero | |
| return z | |
| } | |
| // x != 0 | |
| z.form = finite | |
| s := bits.LeadingZeros64(x) | |
| z.mant = z.mant.setUint64(x << uint(s)) | |
| z.exp = int32(64 - s) // always fits | |
| if z.prec < 64 { | |
| z.round(0) | |
| } | |
| return z | |
| } | |
| // SetUint64 sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to 64 (and rounding will have | |
| // no effect). | |
| func (z *Float) SetUint64(x uint64) *Float { | |
| return z.setBits64(false, x) | |
| } | |
| // SetInt64 sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to 64 (and rounding will have | |
| // no effect). | |
| func (z *Float) SetInt64(x int64) *Float { | |
| u := x | |
| if u < 0 { | |
| u = -u | |
| } | |
| // We cannot simply call z.SetUint64(uint64(u)) and change | |
| // the sign afterwards because the sign affects rounding. | |
| return z.setBits64(x < 0, uint64(u)) | |
| } | |
| // SetFloat64 sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to 53 (and rounding will have | |
| // no effect). SetFloat64 panics with [ErrNaN] if x is a NaN. | |
| func (z *Float) SetFloat64(x float64) *Float { | |
| if z.prec == 0 { | |
| z.prec = 53 | |
| } | |
| if math.IsNaN(x) { | |
| panic(ErrNaN{"Float.SetFloat64(NaN)"}) | |
| } | |
| z.acc = Exact | |
| z.neg = math.Signbit(x) // handle -0, -Inf correctly | |
| if x == 0 { | |
| z.form = zero | |
| return z | |
| } | |
| if math.IsInf(x, 0) { | |
| z.form = inf | |
| return z | |
| } | |
| // normalized x != 0 | |
| z.form = finite | |
| fmant, exp := math.Frexp(x) // get normalized mantissa | |
| z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) | |
| z.exp = int32(exp) // always fits | |
| if z.prec < 53 { | |
| z.round(0) | |
| } | |
| return z | |
| } | |
| // fnorm normalizes mantissa m by shifting it to the left | |
| // such that the msb of the most-significant word (msw) is 1. | |
| // It returns the shift amount. It assumes that len(m) != 0. | |
| func fnorm(m nat) int64 { | |
| if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { | |
| panic("msw of mantissa is 0") | |
| } | |
| s := nlz(m[len(m)-1]) | |
| if s > 0 { | |
| c := lshVU(m, m, s) | |
| if debugFloat && c != 0 { | |
| panic("nlz or lshVU incorrect") | |
| } | |
| } | |
| return int64(s) | |
| } | |
| // SetInt sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to the larger of x.BitLen() | |
| // or 64 (and rounding will have no effect). | |
| func (z *Float) SetInt(x *Int) *Float { | |
| // TODO(gri) can be more efficient if z.prec > 0 | |
| // but small compared to the size of x, or if there | |
| // are many trailing 0's. | |
| bits := uint32(x.BitLen()) | |
| if z.prec == 0 { | |
| z.prec = max(bits, 64) | |
| } | |
| z.acc = Exact | |
| z.neg = x.neg | |
| if len(x.abs) == 0 { | |
| z.form = zero | |
| return z | |
| } | |
| // x != 0 | |
| z.mant = z.mant.set(x.abs) | |
| fnorm(z.mant) | |
| z.setExpAndRound(int64(bits), 0) | |
| return z | |
| } | |
| // SetRat sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to the largest of a.BitLen(), | |
| // b.BitLen(), or 64; with x = a/b. | |
| func (z *Float) SetRat(x *Rat) *Float { | |
| if x.IsInt() { | |
| return z.SetInt(x.Num()) | |
| } | |
| var a, b Float | |
| a.SetInt(x.Num()) | |
| b.SetInt(x.Denom()) | |
| if z.prec == 0 { | |
| z.prec = max(a.prec, b.prec) | |
| } | |
| return z.Quo(&a, &b) | |
| } | |
| // SetInf sets z to the infinite Float -Inf if signbit is | |
| // set, or +Inf if signbit is not set, and returns z. The | |
| // precision of z is unchanged and the result is always | |
| // [Exact]. | |
| func (z *Float) SetInf(signbit bool) *Float { | |
| z.acc = Exact | |
| z.form = inf | |
| z.neg = signbit | |
| return z | |
| } | |
| // Set sets z to the (possibly rounded) value of x and returns z. | |
| // If z's precision is 0, it is changed to the precision of x | |
| // before setting z (and rounding will have no effect). | |
| // Rounding is performed according to z's precision and rounding | |
| // mode; and z's accuracy reports the result error relative to the | |
| // exact (not rounded) result. | |
| func (z *Float) Set(x *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| z.acc = Exact | |
| if z != x { | |
| z.form = x.form | |
| z.neg = x.neg | |
| if x.form == finite { | |
| z.exp = x.exp | |
| z.mant = z.mant.set(x.mant) | |
| } | |
| if z.prec == 0 { | |
| z.prec = x.prec | |
| } else if z.prec < x.prec { | |
| z.round(0) | |
| } | |
| } | |
| return z | |
| } | |
| // Copy sets z to x, with the same precision, rounding mode, and accuracy as x. | |
| // Copy returns z. If x and z are identical, Copy is a no-op. | |
| func (z *Float) Copy(x *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| if z != x { | |
| z.prec = x.prec | |
| z.mode = x.mode | |
| z.acc = x.acc | |
| z.form = x.form | |
| z.neg = x.neg | |
| if z.form == finite { | |
| z.mant = z.mant.set(x.mant) | |
| z.exp = x.exp | |
| } | |
| } | |
| return z | |
| } | |
| // msb32 returns the 32 most significant bits of x. | |
| func msb32(x nat) uint32 { | |
| i := len(x) - 1 | |
| if i < 0 { | |
| return 0 | |
| } | |
| if debugFloat && x[i]&(1<<(_W-1)) == 0 { | |
| panic("x not normalized") | |
| } | |
| switch _W { | |
| case 32: | |
| return uint32(x[i]) | |
| case 64: | |
| return uint32(x[i] >> 32) | |
| } | |
| panic("unreachable") | |
| } | |
| // msb64 returns the 64 most significant bits of x. | |
| func msb64(x nat) uint64 { | |
| i := len(x) - 1 | |
| if i < 0 { | |
| return 0 | |
| } | |
| if debugFloat && x[i]&(1<<(_W-1)) == 0 { | |
| panic("x not normalized") | |
| } | |
| switch _W { | |
| case 32: | |
| v := uint64(x[i]) << 32 | |
| if i > 0 { | |
| v |= uint64(x[i-1]) | |
| } | |
| return v | |
| case 64: | |
| return uint64(x[i]) | |
| } | |
| panic("unreachable") | |
| } | |
| // Uint64 returns the unsigned integer resulting from truncating x | |
| // towards zero. If 0 <= x <= [math.MaxUint64], the result is [Exact] | |
| // if x is an integer and [Below] otherwise. | |
| // The result is (0, [Above]) for x < 0, and ([math.MaxUint64], [Below]) | |
| // for x > [math.MaxUint64]. | |
| func (x *Float) Uint64() (uint64, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| switch x.form { | |
| case finite: | |
| if x.neg { | |
| return 0, Above | |
| } | |
| // 0 < x < +Inf | |
| if x.exp <= 0 { | |
| // 0 < x < 1 | |
| return 0, Below | |
| } | |
| // 1 <= x < Inf | |
| if x.exp <= 64 { | |
| // u = trunc(x) fits into a uint64 | |
| u := msb64(x.mant) >> (64 - uint32(x.exp)) | |
| if x.MinPrec() <= 64 { | |
| return u, Exact | |
| } | |
| return u, Below // x truncated | |
| } | |
| // x too large | |
| return math.MaxUint64, Below | |
| case zero: | |
| return 0, Exact | |
| case inf: | |
| if x.neg { | |
| return 0, Above | |
| } | |
| return math.MaxUint64, Below | |
| } | |
| panic("unreachable") | |
| } | |
| // Int64 returns the integer resulting from truncating x towards zero. | |
| // If [math.MinInt64] <= x <= [math.MaxInt64], the result is [Exact] if x is | |
| // an integer, and [Above] (x < 0) or [Below] (x > 0) otherwise. | |
| // The result is ([math.MinInt64], [Above]) for x < [math.MinInt64], | |
| // and ([math.MaxInt64], [Below]) for x > [math.MaxInt64]. | |
| func (x *Float) Int64() (int64, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| switch x.form { | |
| case finite: | |
| // 0 < |x| < +Inf | |
| acc := makeAcc(x.neg) | |
| if x.exp <= 0 { | |
| // 0 < |x| < 1 | |
| return 0, acc | |
| } | |
| // x.exp > 0 | |
| // 1 <= |x| < +Inf | |
| if x.exp <= 63 { | |
| // i = trunc(x) fits into an int64 (excluding math.MinInt64) | |
| i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) | |
| if x.neg { | |
| i = -i | |
| } | |
| if x.MinPrec() <= uint(x.exp) { | |
| return i, Exact | |
| } | |
| return i, acc // x truncated | |
| } | |
| if x.neg { | |
| // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) | |
| if x.exp == 64 && x.MinPrec() == 1 { | |
| acc = Exact | |
| } | |
| return math.MinInt64, acc | |
| } | |
| // x too large | |
| return math.MaxInt64, Below | |
| case zero: | |
| return 0, Exact | |
| case inf: | |
| if x.neg { | |
| return math.MinInt64, Above | |
| } | |
| return math.MaxInt64, Below | |
| } | |
| panic("unreachable") | |
| } | |
| // Float32 returns the float32 value nearest to x. If x is too small to be | |
| // represented by a float32 (|x| < [math.SmallestNonzeroFloat32]), the result | |
| // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x. | |
| // If x is too large to be represented by a float32 (|x| > [math.MaxFloat32]), | |
| // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x. | |
| func (x *Float) Float32() (float32, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| switch x.form { | |
| case finite: | |
| // 0 < |x| < +Inf | |
| const ( | |
| fbits = 32 // float size | |
| mbits = 23 // mantissa size (excluding implicit msb) | |
| ebits = fbits - mbits - 1 // 8 exponent size | |
| bias = 1<<(ebits-1) - 1 // 127 exponent bias | |
| dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) | |
| emin = 1 - bias // -126 smallest unbiased exponent (normal) | |
| emax = bias // 127 largest unbiased exponent (normal) | |
| ) | |
| // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. | |
| e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 | |
| // Compute precision p for float32 mantissa. | |
| // If the exponent is too small, we have a denormal number before | |
| // rounding and fewer than p mantissa bits of precision available | |
| // (the exponent remains fixed but the mantissa gets shifted right). | |
| p := mbits + 1 // precision of normal float | |
| if e < emin { | |
| // recompute precision | |
| p = mbits + 1 - emin + int(e) | |
| // If p == 0, the mantissa of x is shifted so much to the right | |
| // that its msb falls immediately to the right of the float32 | |
| // mantissa space. In other words, if the smallest denormal is | |
| // considered "1.0", for p == 0, the mantissa value m is >= 0.5. | |
| // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. | |
| // If m == 0.5, it is rounded down to even, i.e., 0.0. | |
| // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. | |
| if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { | |
| // underflow to ±0 | |
| if x.neg { | |
| var z float32 | |
| return -z, Above | |
| } | |
| return 0.0, Below | |
| } | |
| // otherwise, round up | |
| // We handle p == 0 explicitly because it's easy and because | |
| // Float.round doesn't support rounding to 0 bits of precision. | |
| if p == 0 { | |
| if x.neg { | |
| return -math.SmallestNonzeroFloat32, Below | |
| } | |
| return math.SmallestNonzeroFloat32, Above | |
| } | |
| } | |
| // p > 0 | |
| // round | |
| var r Float | |
| r.prec = uint32(p) | |
| r.Set(x) | |
| e = r.exp - 1 | |
| // Rounding may have caused r to overflow to ±Inf | |
| // (rounding never causes underflows to 0). | |
| // If the exponent is too large, also overflow to ±Inf. | |
| if r.form == inf || e > emax { | |
| // overflow | |
| if x.neg { | |
| return float32(math.Inf(-1)), Below | |
| } | |
| return float32(math.Inf(+1)), Above | |
| } | |
| // e <= emax | |
| // Determine sign, biased exponent, and mantissa. | |
| var sign, bexp, mant uint32 | |
| if x.neg { | |
| sign = 1 << (fbits - 1) | |
| } | |
| // Rounding may have caused a denormal number to | |
| // become normal. Check again. | |
| if e < emin { | |
| // denormal number: recompute precision | |
| // Since rounding may have at best increased precision | |
| // and we have eliminated p <= 0 early, we know p > 0. | |
| // bexp == 0 for denormals | |
| p = mbits + 1 - emin + int(e) | |
| mant = msb32(r.mant) >> uint(fbits-p) | |
| } else { | |
| // normal number: emin <= e <= emax | |
| bexp = uint32(e+bias) << mbits | |
| mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) | |
| } | |
| return math.Float32frombits(sign | bexp | mant), r.acc | |
| case zero: | |
| if x.neg { | |
| var z float32 | |
| return -z, Exact | |
| } | |
| return 0.0, Exact | |
| case inf: | |
| if x.neg { | |
| return float32(math.Inf(-1)), Exact | |
| } | |
| return float32(math.Inf(+1)), Exact | |
| } | |
| panic("unreachable") | |
| } | |
| // Float64 returns the float64 value nearest to x. If x is too small to be | |
| // represented by a float64 (|x| < [math.SmallestNonzeroFloat64]), the result | |
| // is (0, [Below]) or (-0, [Above]), respectively, depending on the sign of x. | |
| // If x is too large to be represented by a float64 (|x| > [math.MaxFloat64]), | |
| // the result is (+Inf, [Above]) or (-Inf, [Below]), depending on the sign of x. | |
| func (x *Float) Float64() (float64, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| switch x.form { | |
| case finite: | |
| // 0 < |x| < +Inf | |
| const ( | |
| fbits = 64 // float size | |
| mbits = 52 // mantissa size (excluding implicit msb) | |
| ebits = fbits - mbits - 1 // 11 exponent size | |
| bias = 1<<(ebits-1) - 1 // 1023 exponent bias | |
| dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) | |
| emin = 1 - bias // -1022 smallest unbiased exponent (normal) | |
| emax = bias // 1023 largest unbiased exponent (normal) | |
| ) | |
| // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. | |
| e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 | |
| // Compute precision p for float64 mantissa. | |
| // If the exponent is too small, we have a denormal number before | |
| // rounding and fewer than p mantissa bits of precision available | |
| // (the exponent remains fixed but the mantissa gets shifted right). | |
| p := mbits + 1 // precision of normal float | |
| if e < emin { | |
| // recompute precision | |
| p = mbits + 1 - emin + int(e) | |
| // If p == 0, the mantissa of x is shifted so much to the right | |
| // that its msb falls immediately to the right of the float64 | |
| // mantissa space. In other words, if the smallest denormal is | |
| // considered "1.0", for p == 0, the mantissa value m is >= 0.5. | |
| // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. | |
| // If m == 0.5, it is rounded down to even, i.e., 0.0. | |
| // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. | |
| if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { | |
| // underflow to ±0 | |
| if x.neg { | |
| var z float64 | |
| return -z, Above | |
| } | |
| return 0.0, Below | |
| } | |
| // otherwise, round up | |
| // We handle p == 0 explicitly because it's easy and because | |
| // Float.round doesn't support rounding to 0 bits of precision. | |
| if p == 0 { | |
| if x.neg { | |
| return -math.SmallestNonzeroFloat64, Below | |
| } | |
| return math.SmallestNonzeroFloat64, Above | |
| } | |
| } | |
| // p > 0 | |
| // round | |
| var r Float | |
| r.prec = uint32(p) | |
| r.Set(x) | |
| e = r.exp - 1 | |
| // Rounding may have caused r to overflow to ±Inf | |
| // (rounding never causes underflows to 0). | |
| // If the exponent is too large, also overflow to ±Inf. | |
| if r.form == inf || e > emax { | |
| // overflow | |
| if x.neg { | |
| return math.Inf(-1), Below | |
| } | |
| return math.Inf(+1), Above | |
| } | |
| // e <= emax | |
| // Determine sign, biased exponent, and mantissa. | |
| var sign, bexp, mant uint64 | |
| if x.neg { | |
| sign = 1 << (fbits - 1) | |
| } | |
| // Rounding may have caused a denormal number to | |
| // become normal. Check again. | |
| if e < emin { | |
| // denormal number: recompute precision | |
| // Since rounding may have at best increased precision | |
| // and we have eliminated p <= 0 early, we know p > 0. | |
| // bexp == 0 for denormals | |
| p = mbits + 1 - emin + int(e) | |
| mant = msb64(r.mant) >> uint(fbits-p) | |
| } else { | |
| // normal number: emin <= e <= emax | |
| bexp = uint64(e+bias) << mbits | |
| mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) | |
| } | |
| return math.Float64frombits(sign | bexp | mant), r.acc | |
| case zero: | |
| if x.neg { | |
| var z float64 | |
| return -z, Exact | |
| } | |
| return 0.0, Exact | |
| case inf: | |
| if x.neg { | |
| return math.Inf(-1), Exact | |
| } | |
| return math.Inf(+1), Exact | |
| } | |
| panic("unreachable") | |
| } | |
| // Int returns the result of truncating x towards zero; | |
| // or nil if x is an infinity. | |
| // The result is [Exact] if x.IsInt(); otherwise it is [Below] | |
| // for x > 0, and [Above] for x < 0. | |
| // If a non-nil *[Int] argument z is provided, [Int] stores | |
| // the result in z instead of allocating a new [Int]. | |
| func (x *Float) Int(z *Int) (*Int, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| if z == nil && x.form <= finite { | |
| z = new(Int) | |
| } | |
| switch x.form { | |
| case finite: | |
| // 0 < |x| < +Inf | |
| acc := makeAcc(x.neg) | |
| if x.exp <= 0 { | |
| // 0 < |x| < 1 | |
| return z.SetInt64(0), acc | |
| } | |
| // x.exp > 0 | |
| // 1 <= |x| < +Inf | |
| // determine minimum required precision for x | |
| allBits := uint(len(x.mant)) * _W | |
| exp := uint(x.exp) | |
| if x.MinPrec() <= exp { | |
| acc = Exact | |
| } | |
| // shift mantissa as needed | |
| if z == nil { | |
| z = new(Int) | |
| } | |
| z.neg = x.neg | |
| switch { | |
| case exp > allBits: | |
| z.abs = z.abs.lsh(x.mant, exp-allBits) | |
| default: | |
| z.abs = z.abs.set(x.mant) | |
| case exp < allBits: | |
| z.abs = z.abs.rsh(x.mant, allBits-exp) | |
| } | |
| return z, acc | |
| case zero: | |
| return z.SetInt64(0), Exact | |
| case inf: | |
| return nil, makeAcc(x.neg) | |
| } | |
| panic("unreachable") | |
| } | |
| // Rat returns the rational number corresponding to x; | |
| // or nil if x is an infinity. | |
| // The result is [Exact] if x is not an Inf. | |
| // If a non-nil *[Rat] argument z is provided, [Rat] stores | |
| // the result in z instead of allocating a new [Rat]. | |
| func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { | |
| if debugFloat { | |
| x.validate() | |
| } | |
| if z == nil && x.form <= finite { | |
| z = new(Rat) | |
| } | |
| switch x.form { | |
| case finite: | |
| // 0 < |x| < +Inf | |
| allBits := int32(len(x.mant)) * _W | |
| // build up numerator and denominator | |
| z.a.neg = x.neg | |
| switch { | |
| case x.exp > allBits: | |
| z.a.abs = z.a.abs.lsh(x.mant, uint(x.exp-allBits)) | |
| z.b.abs = z.b.abs[:0] // == 1 (see Rat) | |
| // z already in normal form | |
| default: | |
| z.a.abs = z.a.abs.set(x.mant) | |
| z.b.abs = z.b.abs[:0] // == 1 (see Rat) | |
| // z already in normal form | |
| case x.exp < allBits: | |
| z.a.abs = z.a.abs.set(x.mant) | |
| t := z.b.abs.setUint64(1) | |
| z.b.abs = t.lsh(t, uint(allBits-x.exp)) | |
| z.norm() | |
| } | |
| return z, Exact | |
| case zero: | |
| return z.SetInt64(0), Exact | |
| case inf: | |
| return nil, makeAcc(x.neg) | |
| } | |
| panic("unreachable") | |
| } | |
| // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) | |
| // and returns z. | |
| func (z *Float) Abs(x *Float) *Float { | |
| z.Set(x) | |
| z.neg = false | |
| return z | |
| } | |
| // Neg sets z to the (possibly rounded) value of x with its sign negated, | |
| // and returns z. | |
| func (z *Float) Neg(x *Float) *Float { | |
| z.Set(x) | |
| z.neg = !z.neg | |
| return z | |
| } | |
| func validateBinaryOperands(x, y *Float) { | |
| if !debugFloat { | |
| // avoid performance bugs | |
| panic("validateBinaryOperands called but debugFloat is not set") | |
| } | |
| if len(x.mant) == 0 { | |
| panic("empty mantissa for x") | |
| } | |
| if len(y.mant) == 0 { | |
| panic("empty mantissa for y") | |
| } | |
| } | |
| // z = x + y, ignoring signs of x and y for the addition | |
| // but using the sign of z for rounding the result. | |
| // x and y must have a non-empty mantissa and valid exponent. | |
| func (z *Float) uadd(x, y *Float) { | |
| // Note: This implementation requires 2 shifts most of the | |
| // time. It is also inefficient if exponents or precisions | |
| // differ by wide margins. The following article describes | |
| // an efficient (but much more complicated) implementation | |
| // compatible with the internal representation used here: | |
| // | |
| // Vincent Lefèvre: "The Generic Multiple-Precision Floating- | |
| // Point Addition With Exact Rounding (as in the MPFR Library)" | |
| // http://www.vinc17.net/research/papers/rnc6.pdf | |
| if debugFloat { | |
| validateBinaryOperands(x, y) | |
| } | |
| // compute exponents ex, ey for mantissa with "binary point" | |
| // on the right (mantissa.0) - use int64 to avoid overflow | |
| ex := int64(x.exp) - int64(len(x.mant))*_W | |
| ey := int64(y.exp) - int64(len(y.mant))*_W | |
| al := alias(z.mant, x.mant) || alias(z.mant, y.mant) | |
| // TODO(gri) having a combined add-and-shift primitive | |
| // could make this code significantly faster | |
| switch { | |
| case ex < ey: | |
| if al { | |
| t := nat(nil).lsh(y.mant, uint(ey-ex)) | |
| z.mant = z.mant.add(x.mant, t) | |
| } else { | |
| z.mant = z.mant.lsh(y.mant, uint(ey-ex)) | |
| z.mant = z.mant.add(x.mant, z.mant) | |
| } | |
| default: | |
| // ex == ey, no shift needed | |
| z.mant = z.mant.add(x.mant, y.mant) | |
| case ex > ey: | |
| if al { | |
| t := nat(nil).lsh(x.mant, uint(ex-ey)) | |
| z.mant = z.mant.add(t, y.mant) | |
| } else { | |
| z.mant = z.mant.lsh(x.mant, uint(ex-ey)) | |
| z.mant = z.mant.add(z.mant, y.mant) | |
| } | |
| ex = ey | |
| } | |
| // len(z.mant) > 0 | |
| z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) | |
| } | |
| // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction | |
| // but using the sign of z for rounding the result. | |
| // x and y must have a non-empty mantissa and valid exponent. | |
| func (z *Float) usub(x, y *Float) { | |
| // This code is symmetric to uadd. | |
| // We have not factored the common code out because | |
| // eventually uadd (and usub) should be optimized | |
| // by special-casing, and the code will diverge. | |
| if debugFloat { | |
| validateBinaryOperands(x, y) | |
| } | |
| ex := int64(x.exp) - int64(len(x.mant))*_W | |
| ey := int64(y.exp) - int64(len(y.mant))*_W | |
| al := alias(z.mant, x.mant) || alias(z.mant, y.mant) | |
| switch { | |
| case ex < ey: | |
| if al { | |
| t := nat(nil).lsh(y.mant, uint(ey-ex)) | |
| z.mant = t.sub(x.mant, t) | |
| } else { | |
| z.mant = z.mant.lsh(y.mant, uint(ey-ex)) | |
| z.mant = z.mant.sub(x.mant, z.mant) | |
| } | |
| default: | |
| // ex == ey, no shift needed | |
| z.mant = z.mant.sub(x.mant, y.mant) | |
| case ex > ey: | |
| if al { | |
| t := nat(nil).lsh(x.mant, uint(ex-ey)) | |
| z.mant = t.sub(t, y.mant) | |
| } else { | |
| z.mant = z.mant.lsh(x.mant, uint(ex-ey)) | |
| z.mant = z.mant.sub(z.mant, y.mant) | |
| } | |
| ex = ey | |
| } | |
| // operands may have canceled each other out | |
| if len(z.mant) == 0 { | |
| z.acc = Exact | |
| z.form = zero | |
| z.neg = false | |
| return | |
| } | |
| // len(z.mant) > 0 | |
| z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) | |
| } | |
| // z = x * y, ignoring signs of x and y for the multiplication | |
| // but using the sign of z for rounding the result. | |
| // x and y must have a non-empty mantissa and valid exponent. | |
| func (z *Float) umul(x, y *Float) { | |
| if debugFloat { | |
| validateBinaryOperands(x, y) | |
| } | |
| // Note: This is doing too much work if the precision | |
| // of z is less than the sum of the precisions of x | |
| // and y which is often the case (e.g., if all floats | |
| // have the same precision). | |
| // TODO(gri) Optimize this for the common case. | |
| e := int64(x.exp) + int64(y.exp) | |
| if x == y { | |
| z.mant = z.mant.sqr(nil, x.mant) | |
| } else { | |
| z.mant = z.mant.mul(nil, x.mant, y.mant) | |
| } | |
| z.setExpAndRound(e-fnorm(z.mant), 0) | |
| } | |
| // z = x / y, ignoring signs of x and y for the division | |
| // but using the sign of z for rounding the result. | |
| // x and y must have a non-empty mantissa and valid exponent. | |
| func (z *Float) uquo(x, y *Float) { | |
| if debugFloat { | |
| validateBinaryOperands(x, y) | |
| } | |
| // mantissa length in words for desired result precision + 1 | |
| // (at least one extra bit so we get the rounding bit after | |
| // the division) | |
| n := int(z.prec/_W) + 1 | |
| // compute adjusted x.mant such that we get enough result precision | |
| xadj := x.mant | |
| if d := n - len(x.mant) + len(y.mant); d > 0 { | |
| // d extra words needed => add d "0 digits" to x | |
| xadj = make(nat, len(x.mant)+d) | |
| copy(xadj[d:], x.mant) | |
| } | |
| // TODO(gri): If we have too many digits (d < 0), we should be able | |
| // to shorten x for faster division. But we must be extra careful | |
| // with rounding in that case. | |
| // Compute d before division since there may be aliasing of x.mant | |
| // (via xadj) or y.mant with z.mant. | |
| d := len(xadj) - len(y.mant) | |
| // divide | |
| stk := getStack() | |
| defer stk.free() | |
| var r nat | |
| z.mant, r = z.mant.div(stk, nil, xadj, y.mant) | |
| e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W | |
| // The result is long enough to include (at least) the rounding bit. | |
| // If there's a non-zero remainder, the corresponding fractional part | |
| // (if it were computed), would have a non-zero sticky bit (if it were | |
| // zero, it couldn't have a non-zero remainder). | |
| var sbit uint | |
| if len(r) > 0 { | |
| sbit = 1 | |
| } | |
| z.setExpAndRound(e-fnorm(z.mant), sbit) | |
| } | |
| // ucmp returns -1, 0, or +1, depending on whether | |
| // |x| < |y|, |x| == |y|, or |x| > |y|. | |
| // x and y must have a non-empty mantissa and valid exponent. | |
| func (x *Float) ucmp(y *Float) int { | |
| if debugFloat { | |
| validateBinaryOperands(x, y) | |
| } | |
| switch { | |
| case x.exp < y.exp: | |
| return -1 | |
| case x.exp > y.exp: | |
| return +1 | |
| } | |
| // x.exp == y.exp | |
| // compare mantissas | |
| i := len(x.mant) | |
| j := len(y.mant) | |
| for i > 0 || j > 0 { | |
| var xm, ym Word | |
| if i > 0 { | |
| i-- | |
| xm = x.mant[i] | |
| } | |
| if j > 0 { | |
| j-- | |
| ym = y.mant[j] | |
| } | |
| switch { | |
| case xm < ym: | |
| return -1 | |
| case xm > ym: | |
| return +1 | |
| } | |
| } | |
| return 0 | |
| } | |
| // Handling of sign bit as defined by IEEE 754-2008, section 6.3: | |
| // | |
| // When neither the inputs nor result are NaN, the sign of a product or | |
| // quotient is the exclusive OR of the operands’ signs; the sign of a sum, | |
| // or of a difference x−y regarded as a sum x+(−y), differs from at most | |
| // one of the addends’ signs; and the sign of the result of conversions, | |
| // the quantize operation, the roundToIntegral operations, and the | |
| // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. | |
| // These rules shall apply even when operands or results are zero or infinite. | |
| // | |
| // When the sum of two operands with opposite signs (or the difference of | |
| // two operands with like signs) is exactly zero, the sign of that sum (or | |
| // difference) shall be +0 in all rounding-direction attributes except | |
| // roundTowardNegative; under that attribute, the sign of an exact zero | |
| // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same | |
| // sign as x even when x is zero. | |
| // | |
| // See also: https://play.golang.org/p/RtH3UCt5IH | |
| // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, | |
| // it is changed to the larger of x's or y's precision before the operation. | |
| // Rounding is performed according to z's precision and rounding mode; and | |
| // z's accuracy reports the result error relative to the exact (not rounded) | |
| // result. Add panics with [ErrNaN] if x and y are infinities with opposite | |
| // signs. The value of z is undefined in that case. | |
| func (z *Float) Add(x, y *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| y.validate() | |
| } | |
| if z.prec == 0 { | |
| z.prec = max(x.prec, y.prec) | |
| } | |
| if x.form == finite && y.form == finite { | |
| // x + y (common case) | |
| // Below we set z.neg = x.neg, and when z aliases y this will | |
| // change the y operand's sign. This is fine, because if an | |
| // operand aliases the receiver it'll be overwritten, but we still | |
| // want the original x.neg and y.neg values when we evaluate | |
| // x.neg != y.neg, so we need to save y.neg before setting z.neg. | |
| yneg := y.neg | |
| z.neg = x.neg | |
| if x.neg == yneg { | |
| // x + y == x + y | |
| // (-x) + (-y) == -(x + y) | |
| z.uadd(x, y) | |
| } else { | |
| // x + (-y) == x - y == -(y - x) | |
| // (-x) + y == y - x == -(x - y) | |
| if x.ucmp(y) > 0 { | |
| z.usub(x, y) | |
| } else { | |
| z.neg = !z.neg | |
| z.usub(y, x) | |
| } | |
| } | |
| if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { | |
| z.neg = true | |
| } | |
| return z | |
| } | |
| if x.form == inf && y.form == inf && x.neg != y.neg { | |
| // +Inf + -Inf | |
| // -Inf + +Inf | |
| // value of z is undefined but make sure it's valid | |
| z.acc = Exact | |
| z.form = zero | |
| z.neg = false | |
| panic(ErrNaN{"addition of infinities with opposite signs"}) | |
| } | |
| if x.form == zero && y.form == zero { | |
| // ±0 + ±0 | |
| z.acc = Exact | |
| z.form = zero | |
| z.neg = x.neg && y.neg // -0 + -0 == -0 | |
| return z | |
| } | |
| if x.form == inf || y.form == zero { | |
| // ±Inf + y | |
| // x + ±0 | |
| return z.Set(x) | |
| } | |
| // ±0 + y | |
| // x + ±Inf | |
| return z.Set(y) | |
| } | |
| // Sub sets z to the rounded difference x-y and returns z. | |
| // Precision, rounding, and accuracy reporting are as for [Float.Add]. | |
| // Sub panics with [ErrNaN] if x and y are infinities with equal | |
| // signs. The value of z is undefined in that case. | |
| func (z *Float) Sub(x, y *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| y.validate() | |
| } | |
| if z.prec == 0 { | |
| z.prec = max(x.prec, y.prec) | |
| } | |
| if x.form == finite && y.form == finite { | |
| // x - y (common case) | |
| yneg := y.neg | |
| z.neg = x.neg | |
| if x.neg != yneg { | |
| // x - (-y) == x + y | |
| // (-x) - y == -(x + y) | |
| z.uadd(x, y) | |
| } else { | |
| // x - y == x - y == -(y - x) | |
| // (-x) - (-y) == y - x == -(x - y) | |
| if x.ucmp(y) > 0 { | |
| z.usub(x, y) | |
| } else { | |
| z.neg = !z.neg | |
| z.usub(y, x) | |
| } | |
| } | |
| if z.form == zero && z.mode == ToNegativeInf && z.acc == Exact { | |
| z.neg = true | |
| } | |
| return z | |
| } | |
| if x.form == inf && y.form == inf && x.neg == y.neg { | |
| // +Inf - +Inf | |
| // -Inf - -Inf | |
| // value of z is undefined but make sure it's valid | |
| z.acc = Exact | |
| z.form = zero | |
| z.neg = false | |
| panic(ErrNaN{"subtraction of infinities with equal signs"}) | |
| } | |
| if x.form == zero && y.form == zero { | |
| // ±0 - ±0 | |
| z.acc = Exact | |
| z.form = zero | |
| z.neg = x.neg && !y.neg // -0 - +0 == -0 | |
| return z | |
| } | |
| if x.form == inf || y.form == zero { | |
| // ±Inf - y | |
| // x - ±0 | |
| return z.Set(x) | |
| } | |
| // ±0 - y | |
| // x - ±Inf | |
| return z.Neg(y) | |
| } | |
| // Mul sets z to the rounded product x*y and returns z. | |
| // Precision, rounding, and accuracy reporting are as for [Float.Add]. | |
| // Mul panics with [ErrNaN] if one operand is zero and the other | |
| // operand an infinity. The value of z is undefined in that case. | |
| func (z *Float) Mul(x, y *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| y.validate() | |
| } | |
| if z.prec == 0 { | |
| z.prec = max(x.prec, y.prec) | |
| } | |
| z.neg = x.neg != y.neg | |
| if x.form == finite && y.form == finite { | |
| // x * y (common case) | |
| z.umul(x, y) | |
| return z | |
| } | |
| z.acc = Exact | |
| if x.form == zero && y.form == inf || x.form == inf && y.form == zero { | |
| // ±0 * ±Inf | |
| // ±Inf * ±0 | |
| // value of z is undefined but make sure it's valid | |
| z.form = zero | |
| z.neg = false | |
| panic(ErrNaN{"multiplication of zero with infinity"}) | |
| } | |
| if x.form == inf || y.form == inf { | |
| // ±Inf * y | |
| // x * ±Inf | |
| z.form = inf | |
| return z | |
| } | |
| // ±0 * y | |
| // x * ±0 | |
| z.form = zero | |
| return z | |
| } | |
| // Quo sets z to the rounded quotient x/y and returns z. | |
| // Precision, rounding, and accuracy reporting are as for [Float.Add]. | |
| // Quo panics with [ErrNaN] if both operands are zero or infinities. | |
| // The value of z is undefined in that case. | |
| func (z *Float) Quo(x, y *Float) *Float { | |
| if debugFloat { | |
| x.validate() | |
| y.validate() | |
| } | |
| if z.prec == 0 { | |
| z.prec = max(x.prec, y.prec) | |
| } | |
| z.neg = x.neg != y.neg | |
| if x.form == finite && y.form == finite { | |
| // x / y (common case) | |
| z.uquo(x, y) | |
| return z | |
| } | |
| z.acc = Exact | |
| if x.form == zero && y.form == zero || x.form == inf && y.form == inf { | |
| // ±0 / ±0 | |
| // ±Inf / ±Inf | |
| // value of z is undefined but make sure it's valid | |
| z.form = zero | |
| z.neg = false | |
| panic(ErrNaN{"division of zero by zero or infinity by infinity"}) | |
| } | |
| if x.form == zero || y.form == inf { | |
| // ±0 / y | |
| // x / ±Inf | |
| z.form = zero | |
| return z | |
| } | |
| // x / ±0 | |
| // ±Inf / y | |
| z.form = inf | |
| return z | |
| } | |
| // Cmp compares x and y and returns: | |
| // - -1 if x < y; | |
| // - 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf); | |
| // - +1 if x > y. | |
| func (x *Float) Cmp(y *Float) int { | |
| if debugFloat { | |
| x.validate() | |
| y.validate() | |
| } | |
| mx := x.ord() | |
| my := y.ord() | |
| switch { | |
| case mx < my: | |
| return -1 | |
| case mx > my: | |
| return +1 | |
| } | |
| // mx == my | |
| // only if |mx| == 1 we have to compare the mantissae | |
| switch mx { | |
| case -1: | |
| return y.ucmp(x) | |
| case +1: | |
| return x.ucmp(y) | |
| } | |
| return 0 | |
| } | |
| // ord classifies x and returns: | |
| // | |
| // -2 if -Inf == x | |
| // -1 if -Inf < x < 0 | |
| // 0 if x == 0 (signed or unsigned) | |
| // +1 if 0 < x < +Inf | |
| // +2 if x == +Inf | |
| func (x *Float) ord() int { | |
| var m int | |
| switch x.form { | |
| case finite: | |
| m = 1 | |
| case zero: | |
| return 0 | |
| case inf: | |
| m = 2 | |
| } | |
| if x.neg { | |
| m = -m | |
| } | |
| return m | |
| } | |