/** * @file Helper module for mathematical processing. * * These functions and classes are only used internally, * meaning an end-user shouldn't need to access anything here. * * @module utils/maths */ /** * @typedef {Int8Array | Uint8Array | Uint8ClampedArray | Int16Array | Uint16Array | Int32Array | Uint32Array | Float16Array | Float32Array | Float64Array} TypedArray * @typedef {BigInt64Array | BigUint64Array} BigTypedArray * @typedef {TypedArray | BigTypedArray} AnyTypedArray */ /** * @param {TypedArray} input */ export function interpolate_data(input, [in_channels, in_height, in_width], [out_height, out_width], mode = 'bilinear', align_corners = false) { // TODO use mode and align_corners // Output image dimensions const x_scale = out_width / in_width; const y_scale = out_height / in_height; // Output image // @ts-ignore const out_img = new input.constructor(out_height * out_width * in_channels); // Pre-calculate strides const inStride = in_height * in_width; const outStride = out_height * out_width; for (let i = 0; i < out_height; ++i) { for (let j = 0; j < out_width; ++j) { // Calculate output offset const outOffset = i * out_width + j; // Calculate input pixel coordinates const x = (j + 0.5) / x_scale - 0.5; const y = (i + 0.5) / y_scale - 0.5; // Calculate the four nearest input pixels // We also check if the input pixel coordinates are within the image bounds let x1 = Math.floor(x); let y1 = Math.floor(y); const x2 = Math.min(x1 + 1, in_width - 1); const y2 = Math.min(y1 + 1, in_height - 1); x1 = Math.max(x1, 0); y1 = Math.max(y1, 0); // Calculate the fractional distances between the input pixel and the four nearest pixels const s = x - x1; const t = y - y1; // Perform bilinear interpolation const w1 = (1 - s) * (1 - t); const w2 = s * (1 - t); const w3 = (1 - s) * t; const w4 = s * t; // Calculate the four nearest input pixel indices const yStride = y1 * in_width; const xStride = y2 * in_width; const idx1 = yStride + x1; const idx2 = yStride + x2; const idx3 = xStride + x1; const idx4 = xStride + x2; for (let k = 0; k < in_channels; ++k) { // Calculate channel offset const cOffset = k * inStride; out_img[k * outStride + outOffset] = w1 * input[cOffset + idx1] + w2 * input[cOffset + idx2] + w3 * input[cOffset + idx3] + w4 * input[cOffset + idx4]; } } } return out_img; } /** * Helper method to permute a `AnyTypedArray` directly * @template {AnyTypedArray} T * @param {T} array * @param {number[]} dims * @param {number[]} axes * @returns {[T, number[]]} The permuted array and the new shape. */ export function permute_data(array, dims, axes) { // Calculate the new shape of the permuted array // and the stride of the original array const shape = new Array(axes.length); const stride = new Array(axes.length); for (let i = axes.length - 1, s = 1; i >= 0; --i) { stride[i] = s; shape[i] = dims[axes[i]]; s *= shape[i]; } // Precompute inverse mapping of stride const invStride = axes.map((_, i) => stride[axes.indexOf(i)]); // Create the permuted array with the new shape // @ts-ignore const permutedData = new array.constructor(array.length); // Permute the original array to the new array for (let i = 0; i < array.length; ++i) { let newIndex = 0; for (let j = dims.length - 1, k = i; j >= 0; --j) { newIndex += (k % dims[j]) * invStride[j]; k = Math.floor(k / dims[j]); } permutedData[newIndex] = array[i]; } return [permutedData, shape]; } /** * Compute the softmax of an array of numbers. * @template {TypedArray|number[]} T * @param {T} arr The array of numbers to compute the softmax of. * @returns {T} The softmax array. */ export function softmax(arr) { // Compute the maximum value in the array const maxVal = max(arr)[0]; // Compute the exponentials of the array values const exps = arr.map(x => Math.exp(x - maxVal)); // Compute the sum of the exponentials // @ts-ignore const sumExps = exps.reduce((acc, val) => acc + val, 0); // Compute the softmax values const softmaxArr = exps.map(x => x / sumExps); return /** @type {T} */(softmaxArr); } /** * Calculates the logarithm of the softmax function for the input array. * @template {TypedArray|number[]} T * @param {T} arr The input array to calculate the log_softmax function for. * @returns {T} The resulting log_softmax array. */ export function log_softmax(arr) { // Compute the maximum value in the array const maxVal = max(arr)[0]; // Compute the sum of the exponentials let sumExps = 0; for(let i = 0; i < arr.length; ++i) { sumExps += Math.exp(arr[i] - maxVal); } // Compute the log of the sum const logSum = Math.log(sumExps); // Compute the softmax values const logSoftmaxArr = arr.map(x => x - maxVal - logSum); return /** @type {T} */(logSoftmaxArr); } /** * Calculates the dot product of two arrays. * @param {number[]} arr1 The first array. * @param {number[]} arr2 The second array. * @returns {number} The dot product of arr1 and arr2. */ export function dot(arr1, arr2) { let result = 0; for (let i = 0; i < arr1.length; ++i) { result += arr1[i] * arr2[i]; } return result; } /** * Computes the cosine similarity between two arrays. * * @param {number[]} arr1 The first array. * @param {number[]} arr2 The second array. * @returns {number} The cosine similarity between the two arrays. */ export function cos_sim(arr1, arr2) { // Calculate dot product of the two arrays const dotProduct = dot(arr1, arr2); // Calculate the magnitude of the first array const magnitudeA = magnitude(arr1); // Calculate the magnitude of the second array const magnitudeB = magnitude(arr2); // Calculate the cosine similarity const cosineSimilarity = dotProduct / (magnitudeA * magnitudeB); return cosineSimilarity; } /** * Calculates the magnitude of a given array. * @param {number[]} arr The array to calculate the magnitude of. * @returns {number} The magnitude of the array. */ export function magnitude(arr) { return Math.sqrt(arr.reduce((acc, val) => acc + val * val, 0)); } /** * Returns the value and index of the minimum element in an array. * @template {number[]|bigint[]|AnyTypedArray} T * @param {T} arr array of numbers. * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the minimum element, of the form: [valueOfMin, indexOfMin] * @throws {Error} If array is empty. */ export function min(arr) { if (arr.length === 0) throw Error('Array must not be empty'); let min = arr[0]; let indexOfMin = 0; for (let i = 1; i < arr.length; ++i) { if (arr[i] < min) { min = arr[i]; indexOfMin = i; } } return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([min, indexOfMin]); } /** * Returns the value and index of the maximum element in an array. * @template {number[]|bigint[]|AnyTypedArray} T * @param {T} arr array of numbers. * @returns {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} the value and index of the maximum element, of the form: [valueOfMax, indexOfMax] * @throws {Error} If array is empty. */ export function max(arr) { if (arr.length === 0) throw Error('Array must not be empty'); let max = arr[0]; let indexOfMax = 0; for (let i = 1; i < arr.length; ++i) { if (arr[i] > max) { max = arr[i]; indexOfMax = i; } } return /** @type {T extends bigint[]|BigTypedArray ? [bigint, number] : [number, number]} */([max, indexOfMax]); } function isPowerOfTwo(number) { // Check if the number is greater than 0 and has only one bit set to 1 return (number > 0) && ((number & (number - 1)) === 0); } /** * Implementation of Radix-4 FFT. * * P2FFT class provides functionality for performing Fast Fourier Transform on arrays * which are a power of two in length. * Code adapted from https://www.npmjs.com/package/fft.js */ class P2FFT { /** * @param {number} size The size of the input array. Must be a power of two larger than 1. * @throws {Error} FFT size must be a power of two larger than 1. */ constructor(size) { this.size = size | 0; // convert to a 32-bit signed integer if (this.size <= 1 || !isPowerOfTwo(this.size)) throw new Error('FFT size must be a power of two larger than 1'); this._csize = size << 1; this.table = new Float64Array(this.size * 2); for (let i = 0; i < this.table.length; i += 2) { const angle = Math.PI * i / this.size; this.table[i] = Math.cos(angle); this.table[i + 1] = -Math.sin(angle); } // Find size's power of two let power = 0; for (let t = 1; this.size > t; t <<= 1) ++power; // Calculate initial step's width: // * If we are full radix-4, it is 2x smaller to give inital len=8 // * Otherwise it is the same as `power` to give len=4 this._width = power % 2 === 0 ? power - 1 : power; // Pre-compute bit-reversal patterns this._bitrev = new Int32Array(1 << this._width); for (let j = 0; j < this._bitrev.length; ++j) { this._bitrev[j] = 0; for (let shift = 0; shift < this._width; shift += 2) { const revShift = this._width - shift - 2; this._bitrev[j] |= ((j >>> shift) & 3) << revShift; } } } /** * Create a complex number array with size `2 * size` * * @returns {Float64Array} A complex number array with size `2 * size` */ createComplexArray() { return new Float64Array(this._csize); } /** * Converts a complex number representation stored in a Float64Array to an array of real numbers. * * @param {Float64Array} complex The complex number representation to be converted. * @param {number[]} [storage] An optional array to store the result in. * @returns {number[]} An array of real numbers representing the input complex number representation. */ fromComplexArray(complex, storage) { const res = storage || new Array(complex.length >>> 1); for (let i = 0; i < complex.length; i += 2) res[i >>> 1] = complex[i]; return res; } /** * Convert a real-valued input array to a complex-valued output array. * @param {Float64Array} input The real-valued input array. * @param {Float64Array} [storage] Optional buffer to store the output array. * @returns {Float64Array} The complex-valued output array. */ toComplexArray(input, storage) { const res = storage || this.createComplexArray(); for (let i = 0; i < res.length; i += 2) { res[i] = input[i >>> 1]; res[i + 1] = 0; } return res; } /** * Performs a Fast Fourier Transform (FFT) on the given input data and stores the result in the output buffer. * * @param {Float64Array} out The output buffer to store the result. * @param {Float64Array} data The input data to transform. * * @throws {Error} Input and output buffers must be different. * * @returns {void} */ transform(out, data) { if (out === data) throw new Error('Input and output buffers must be different'); this._transform4(out, data, 1 /* DONE */); } /** * Performs a real-valued forward FFT on the given input buffer and stores the result in the given output buffer. * The input buffer must contain real values only, while the output buffer will contain complex values. The input and * output buffers must be different. * * @param {Float64Array} out The output buffer. * @param {Float64Array} data The input buffer containing real values. * * @throws {Error} If the input and output buffers are the same. */ realTransform(out, data) { if (out === data) throw new Error('Input and output buffers must be different'); this._realTransform4(out, data, 1 /* DONE */); } /** * Performs an inverse FFT transformation on the given `data` array, and stores the result in `out`. * The `out` array must be a different buffer than the `data` array. The `out` array will contain the * result of the transformation. The `data` array will not be modified. * * @param {Float64Array} out The output buffer for the transformed data. * @param {Float64Array} data The input data to transform. * @throws {Error} If `out` and `data` refer to the same buffer. * @returns {void} */ inverseTransform(out, data) { if (out === data) throw new Error('Input and output buffers must be different'); this._transform4(out, data, -1 /* DONE */); for (let i = 0; i < out.length; ++i) out[i] /= this.size; } /** * Performs a radix-4 implementation of a discrete Fourier transform on a given set of data. * * @param {Float64Array} out The output buffer for the transformed data. * @param {Float64Array} data The input buffer of data to be transformed. * @param {number} inv A scaling factor to apply to the transform. * @returns {void} */ _transform4(out, data, inv) { // radix-4 implementation const size = this._csize; // Initial step (permute and transform) const width = this._width; let step = 1 << width; let len = (size / step) << 1; let outOff; let t; const bitrev = this._bitrev; if (len === 4) { for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { const off = bitrev[t]; this._singleTransform2(data, out, outOff, off, step); } } else { // len === 8 for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { const off = bitrev[t]; this._singleTransform4(data, out, outOff, off, step, inv); } } // Loop through steps in decreasing order const table = this.table; for (step >>= 2; step >= 2; step >>= 2) { len = (size / step) << 1; const quarterLen = len >>> 2; // Loop through offsets in the data for (outOff = 0; outOff < size; outOff += len) { // Full case const limit = outOff + quarterLen - 1; for (let i = outOff, k = 0; i < limit; i += 2, k += step) { const A = i; const B = A + quarterLen; const C = B + quarterLen; const D = C + quarterLen; // Original values const Ar = out[A]; const Ai = out[A + 1]; const Br = out[B]; const Bi = out[B + 1]; const Cr = out[C]; const Ci = out[C + 1]; const Dr = out[D]; const Di = out[D + 1]; const tableBr = table[k]; const tableBi = inv * table[k + 1]; const MBr = Br * tableBr - Bi * tableBi; const MBi = Br * tableBi + Bi * tableBr; const tableCr = table[2 * k]; const tableCi = inv * table[2 * k + 1]; const MCr = Cr * tableCr - Ci * tableCi; const MCi = Cr * tableCi + Ci * tableCr; const tableDr = table[3 * k]; const tableDi = inv * table[3 * k + 1]; const MDr = Dr * tableDr - Di * tableDi; const MDi = Dr * tableDi + Di * tableDr; // Pre-Final values const T0r = Ar + MCr; const T0i = Ai + MCi; const T1r = Ar - MCr; const T1i = Ai - MCi; const T2r = MBr + MDr; const T2i = MBi + MDi; const T3r = inv * (MBr - MDr); const T3i = inv * (MBi - MDi); // Final values out[A] = T0r + T2r; out[A + 1] = T0i + T2i; out[B] = T1r + T3i; out[B + 1] = T1i - T3r; out[C] = T0r - T2r; out[C + 1] = T0i - T2i; out[D] = T1r - T3i; out[D + 1] = T1i + T3r; } } } } /** * Performs a radix-2 implementation of a discrete Fourier transform on a given set of data. * * @param {Float64Array} data The input buffer of data to be transformed. * @param {Float64Array} out The output buffer for the transformed data. * @param {number} outOff The offset at which to write the output data. * @param {number} off The offset at which to begin reading the input data. * @param {number} step The step size for indexing the input data. * @returns {void} */ _singleTransform2(data, out, outOff, off, step) { // radix-2 implementation // NOTE: Only called for len=4 const evenR = data[off]; const evenI = data[off + 1]; const oddR = data[off + step]; const oddI = data[off + step + 1]; out[outOff] = evenR + oddR; out[outOff + 1] = evenI + oddI; out[outOff + 2] = evenR - oddR; out[outOff + 3] = evenI - oddI; } /** * Performs radix-4 transformation on input data of length 8 * * @param {Float64Array} data Input data array of length 8 * @param {Float64Array} out Output data array of length 8 * @param {number} outOff Index of output array to start writing from * @param {number} off Index of input array to start reading from * @param {number} step Step size between elements in input array * @param {number} inv Scaling factor for inverse transform * * @returns {void} */ _singleTransform4(data, out, outOff, off, step, inv) { // radix-4 // NOTE: Only called for len=8 const step2 = step * 2; const step3 = step * 3; // Original values const Ar = data[off]; const Ai = data[off + 1]; const Br = data[off + step]; const Bi = data[off + step + 1]; const Cr = data[off + step2]; const Ci = data[off + step2 + 1]; const Dr = data[off + step3]; const Di = data[off + step3 + 1]; // Pre-Final values const T0r = Ar + Cr; const T0i = Ai + Ci; const T1r = Ar - Cr; const T1i = Ai - Ci; const T2r = Br + Dr; const T2i = Bi + Di; const T3r = inv * (Br - Dr); const T3i = inv * (Bi - Di); // Final values out[outOff] = T0r + T2r; out[outOff + 1] = T0i + T2i; out[outOff + 2] = T1r + T3i; out[outOff + 3] = T1i - T3r; out[outOff + 4] = T0r - T2r; out[outOff + 5] = T0i - T2i; out[outOff + 6] = T1r - T3i; out[outOff + 7] = T1i + T3r; } /** * Real input radix-4 implementation * @param {Float64Array} out Output array for the transformed data * @param {Float64Array} data Input array of real data to be transformed * @param {number} inv The scale factor used to normalize the inverse transform */ _realTransform4(out, data, inv) { // Real input radix-4 implementation const size = this._csize; // Initial step (permute and transform) const width = this._width; let step = 1 << width; let len = (size / step) << 1; let outOff; let t; const bitrev = this._bitrev; if (len === 4) { for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { const off = bitrev[t]; this._singleRealTransform2(data, out, outOff, off >>> 1, step >>> 1); } } else { // len === 8 for (outOff = 0, t = 0; outOff < size; outOff += len, ++t) { const off = bitrev[t]; this._singleRealTransform4(data, out, outOff, off >>> 1, step >>> 1, inv); } } // Loop through steps in decreasing order const table = this.table; for (step >>= 2; step >= 2; step >>= 2) { len = (size / step) << 1; const halfLen = len >>> 1; const quarterLen = halfLen >>> 1; const hquarterLen = quarterLen >>> 1; // Loop through offsets in the data for (outOff = 0; outOff < size; outOff += len) { for (let i = 0, k = 0; i <= hquarterLen; i += 2, k += step) { const A = outOff + i; const B = A + quarterLen; const C = B + quarterLen; const D = C + quarterLen; // Original values const Ar = out[A]; const Ai = out[A + 1]; const Br = out[B]; const Bi = out[B + 1]; const Cr = out[C]; const Ci = out[C + 1]; const Dr = out[D]; const Di = out[D + 1]; // Middle values const MAr = Ar; const MAi = Ai; const tableBr = table[k]; const tableBi = inv * table[k + 1]; const MBr = Br * tableBr - Bi * tableBi; const MBi = Br * tableBi + Bi * tableBr; const tableCr = table[2 * k]; const tableCi = inv * table[2 * k + 1]; const MCr = Cr * tableCr - Ci * tableCi; const MCi = Cr * tableCi + Ci * tableCr; const tableDr = table[3 * k]; const tableDi = inv * table[3 * k + 1]; const MDr = Dr * tableDr - Di * tableDi; const MDi = Dr * tableDi + Di * tableDr; // Pre-Final values const T0r = MAr + MCr; const T0i = MAi + MCi; const T1r = MAr - MCr; const T1i = MAi - MCi; const T2r = MBr + MDr; const T2i = MBi + MDi; const T3r = inv * (MBr - MDr); const T3i = inv * (MBi - MDi); // Final values out[A] = T0r + T2r; out[A + 1] = T0i + T2i; out[B] = T1r + T3i; out[B + 1] = T1i - T3r; // Output final middle point if (i === 0) { out[C] = T0r - T2r; out[C + 1] = T0i - T2i; continue; } // Do not overwrite ourselves if (i === hquarterLen) continue; const SA = outOff + quarterLen - i; const SB = outOff + halfLen - i; out[SA] = T1r - inv * T3i; out[SA + 1] = -T1i - inv * T3r; out[SB] = T0r - inv * T2r; out[SB + 1] = -T0i + inv * T2i; } } } // Complete the spectrum by adding its mirrored negative frequency components. const half = size >>> 1; for (let i = 2; i < half; i += 2) { out[size - i] = out[i]; out[size - i + 1] = -out[i + 1]; } } /** * Performs a single real input radix-2 transformation on the provided data * * @param {Float64Array} data The input data array * @param {Float64Array} out The output data array * @param {number} outOff The output offset * @param {number} off The input offset * @param {number} step The step * * @returns {void} */ _singleRealTransform2(data, out, outOff, off, step) { // radix-2 implementation // NOTE: Only called for len=4 const evenR = data[off]; const oddR = data[off + step]; out[outOff] = evenR + oddR; out[outOff + 1] = 0; out[outOff + 2] = evenR - oddR; out[outOff + 3] = 0; } /** * Computes a single real-valued transform using radix-4 algorithm. * This method is only called for len=8. * * @param {Float64Array} data The input data array. * @param {Float64Array} out The output data array. * @param {number} outOff The offset into the output array. * @param {number} off The offset into the input array. * @param {number} step The step size for the input array. * @param {number} inv The value of inverse. */ _singleRealTransform4(data, out, outOff, off, step, inv) { // radix-4 // NOTE: Only called for len=8 const step2 = step * 2; const step3 = step * 3; // Original values const Ar = data[off]; const Br = data[off + step]; const Cr = data[off + step2]; const Dr = data[off + step3]; // Pre-Final values const T0r = Ar + Cr; const T1r = Ar - Cr; const T2r = Br + Dr; const T3r = inv * (Br - Dr); // Final values out[outOff] = T0r + T2r; out[outOff + 1] = 0; out[outOff + 2] = T1r; out[outOff + 3] = -T3r; out[outOff + 4] = T0r - T2r; out[outOff + 5] = 0; out[outOff + 6] = T1r; out[outOff + 7] = T3r; } } /** * NP2FFT class provides functionality for performing Fast Fourier Transform on arrays * which are not a power of two in length. In such cases, the chirp-z transform is used. * * For more information, see: https://math.stackexchange.com/questions/77118/non-power-of-2-ffts/77156#77156 */ class NP2FFT { /** * Constructs a new NP2FFT object. * @param {number} fft_length The length of the FFT */ constructor(fft_length) { // Helper variables const a = 2 * (fft_length - 1); const b = 2 * (2 * fft_length - 1); const nextP2 = 2 ** (Math.ceil(Math.log2(b))) this.bufferSize = nextP2; this._a = a; // Define buffers // Compute chirp for transform const chirp = new Float64Array(b); const ichirp = new Float64Array(nextP2); this._chirpBuffer = new Float64Array(nextP2); this._buffer1 = new Float64Array(nextP2); this._buffer2 = new Float64Array(nextP2); this._outBuffer1 = new Float64Array(nextP2); this._outBuffer2 = new Float64Array(nextP2); // Compute complex exponentiation const theta = -2 * Math.PI / fft_length; const baseR = Math.cos(theta); const baseI = Math.sin(theta); // Precompute helper for chirp-z transform for (let i = 0; i < b >> 1; ++i) { // Compute complex power: const e = (i + 1 - fft_length) ** 2 / 2.0; // Compute the modulus and argument of the result const result_mod = Math.sqrt(baseR ** 2 + baseI ** 2) ** e; const result_arg = e * Math.atan2(baseI, baseR); // Convert the result back to rectangular form // and assign to chirp and ichirp const i2 = 2 * i; chirp[i2] = result_mod * Math.cos(result_arg); chirp[i2 + 1] = result_mod * Math.sin(result_arg); // conjugate ichirp[i2] = chirp[i2]; ichirp[i2 + 1] = - chirp[i2 + 1]; } this._slicedChirpBuffer = chirp.subarray(a, b); // create object to perform Fast Fourier Transforms // with `nextP2` complex numbers this._f = new P2FFT(nextP2 >> 1); this._f.transform(this._chirpBuffer, ichirp); } _transform(output, input, real) { const ib1 = this._buffer1; const ib2 = this._buffer2; const ob2 = this._outBuffer1; const ob3 = this._outBuffer2; const cb = this._chirpBuffer; const sb = this._slicedChirpBuffer; const a = this._a; if (real) { // Real multiplication for (let j = 0; j < sb.length; j += 2) { const j2 = j + 1 const j3 = j >> 1; const a_real = input[j3]; ib1[j] = a_real * sb[j]; ib1[j2] = a_real * sb[j2]; } } else { // Complex multiplication for (let j = 0; j < sb.length; j += 2) { const j2 = j + 1 ib1[j] = input[j] * sb[j] - input[j2] * sb[j2]; ib1[j2] = input[j] * sb[j2] + input[j2] * sb[j]; } } this._f.transform(ob2, ib1); for (let j = 0; j < cb.length; j += 2) { const j2 = j + 1; ib2[j] = ob2[j] * cb[j] - ob2[j2] * cb[j2]; ib2[j2] = ob2[j] * cb[j2] + ob2[j2] * cb[j]; } this._f.inverseTransform(ob3, ib2); for (let j = 0; j < ob3.length; j += 2) { const a_real = ob3[j + a]; const a_imag = ob3[j + a + 1]; const b_real = sb[j]; const b_imag = sb[j + 1]; output[j] = a_real * b_real - a_imag * b_imag; output[j + 1] = a_real * b_imag + a_imag * b_real; } } transform(output, input) { this._transform(output, input, false); } realTransform(output, input) { this._transform(output, input, true); } } export class FFT { constructor(fft_length) { this.fft_length = fft_length; this.isPowerOfTwo = isPowerOfTwo(fft_length); if (this.isPowerOfTwo) { this.fft = new P2FFT(fft_length); this.outputBufferSize = 2 * fft_length; } else { this.fft = new NP2FFT(fft_length); this.outputBufferSize = this.fft.bufferSize; } } realTransform(out, input) { this.fft.realTransform(out, input); } transform(out, input) { this.fft.transform(out, input); } } /** * Performs median filter on the provided data. Padding is done by mirroring the data. * @param {AnyTypedArray} data The input array * @param {number} windowSize The window size */ export function medianFilter(data, windowSize) { if (windowSize % 2 === 0 || windowSize <= 0) { throw new Error('Window size must be a positive odd number'); } // @ts-ignore const outputArray = new data.constructor(data.length); // @ts-ignore const buffer = new data.constructor(windowSize); // Reusable array for storing values const halfWindowSize = Math.floor(windowSize / 2); for (let i = 0; i < data.length; ++i) { let valuesIndex = 0; for (let j = -halfWindowSize; j <= halfWindowSize; ++j) { let index = i + j; if (index < 0) { index = Math.abs(index); } else if (index >= data.length) { index = 2 * (data.length - 1) - index; } buffer[valuesIndex++] = data[index]; } buffer.sort(); outputArray[i] = buffer[halfWindowSize]; } return outputArray; } /** * Helper function to round a number to a given number of decimals * @param {number} num The number to round * @param {number} decimals The number of decimals * @returns {number} The rounded number */ export function round(num, decimals) { const pow = Math.pow(10, decimals); return Math.round(num * pow) / pow; } /** * Helper function to round a number to the nearest integer, with ties rounded to the nearest even number. * Also known as "bankers' rounding". This is the default rounding mode in python. For example: * 1.5 rounds to 2 and 2.5 rounds to 2. * * @param {number} x The number to round * @returns {number} The rounded number */ export function bankers_round(x) { const r = Math.round(x); const br = Math.abs(x) % 1 === 0.5 ? (r % 2 === 0 ? r : r - 1) : r; return br; } /** * Measures similarity between two temporal sequences (e.g., input audio and output tokens * to generate token-level timestamps). * @param {number[][]} matrix * @returns {number[][]} */ export function dynamic_time_warping(matrix) { const output_length = matrix.length; const input_length = matrix[0].length; const outputShape = [output_length + 1, input_length + 1]; const cost = Array.from( { length: outputShape[0] }, () => Array(outputShape[1]).fill(Infinity) ); cost[0][0] = 0; const trace = Array.from( { length: outputShape[0] }, () => Array(outputShape[1]).fill(-1) ); for (let j = 1; j < outputShape[1]; ++j) { for (let i = 1; i < outputShape[0]; ++i) { const c0 = cost[i - 1][j - 1]; const c1 = cost[i - 1][j]; const c2 = cost[i][j - 1]; let c, t; if (c0 < c1 && c0 < c2) { c = c0; t = 0; } else if (c1 < c0 && c1 < c2) { c = c1; t = 1; } else { c = c2; t = 2; } cost[i][j] = matrix[i - 1][j - 1] + c; trace[i][j] = t; } } for (let i = 0; i < outputShape[1]; ++i) { // trace[0, :] = 2 trace[0][i] = 2; } for (let i = 0; i < outputShape[0]; ++i) { // trace[:, 0] = 1 trace[i][0] = 1; } // backtrace let i = output_length; let j = input_length; let text_indices = []; let time_indices = []; while (i > 0 || j > 0) { text_indices.push(i - 1); time_indices.push(j - 1); switch (trace[i][j]) { case 0: --i; --j; break; case 1: --i; break; case 2: --j; break; default: throw new Error( `Internal error in dynamic time warping. Unexpected trace[${i}, ${j}]. Please file a bug report.` ) } } text_indices.reverse(); time_indices.reverse(); return [text_indices, time_indices]; }