Spaces:
Running
Running
| /** | |
| * Keet - Continuous Mel Producer Worker | |
| * | |
| * Runs in a separate Web Worker thread, continuously computing raw log-mel | |
| * spectrogram frames as audio arrives. When the inference pipeline needs | |
| * features for a time window, it requests normalized features from this worker. | |
| * | |
| * Architecture: | |
| * AudioEngine β pushAudio(chunk) β mel.worker computes raw mel frames incrementally | |
| * Inference trigger β getFeatures(startFrame, endFrame) β mel.worker normalizes & returns | |
| * | |
| * This decouples mel computation from the inference thread entirely. | |
| * Features are always ready when the encoder needs them β zero wait for preprocessing. | |
| * | |
| * Imports from mel-math.ts (local module, no external deps). | |
| */ | |
| import { | |
| MEL_CONSTANTS, | |
| createMelFilterbank, | |
| createPaddedHannWindow, | |
| precomputeTwiddles, | |
| fft, | |
| normalizeMelFeatures, | |
| sampleToFrame, | |
| } from './mel-math'; | |
| const { N_FFT, HOP_LENGTH, N_FREQ_BINS, PREEMPH, LOG_ZERO_GUARD } = MEL_CONSTANTS; | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Worker State | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| let nMels = 128; | |
| // Pre-emphasized audio buffer (compacted after each pushAudio to stay bounded). | |
| // Only retains samples needed for the next mel frame's FFT window. | |
| let preemphBuffer = new Float32Array(0); | |
| let preemphBaseIdx = 0; // Global sample index corresponding to preemphBuffer[0] | |
| let preemphLen = 0; // Number of valid samples currently in preemphBuffer | |
| let lastRawSample = 0; | |
| let totalSamples = 0; | |
| // Raw mel frame buffer: fixed-size circular, mel-major layout [nMels * maxFrames]. | |
| // For mel bin m at frame t: rawMelBuffer[m * maxFrames + (t % maxFrames)]. | |
| let rawMelBuffer: Float32Array | null = null; | |
| let maxFrames = 0; | |
| let computedFrames = 0; // Monotonic: total frames computed (next frame index) | |
| let baseFrame = 0; // Oldest frame still available in the circular buffer | |
| // Pre-allocated FFT buffers (reused per frame) | |
| let fftRe: Float64Array; | |
| let fftIm: Float64Array; | |
| let powerBuf: Float32Array; | |
| // Pre-computed constants | |
| let melFilterbank: Float32Array; | |
| let hannWindow: Float64Array; | |
| let twiddles: { cos: Float64Array; sin: Float64Array }; | |
| // Logging throttle for getFeatures (avoid console spam) | |
| let lastGetFeaturesLogTime = 0; | |
| const GET_FEATURES_LOG_INTERVAL = 5000; // Log every 5 seconds max | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Initialization | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function init(config: { nMels?: number }) { | |
| const t0 = performance.now(); | |
| nMels = config.nMels || 128; | |
| // Build mel computation constants | |
| melFilterbank = createMelFilterbank(nMels); | |
| hannWindow = createPaddedHannWindow(); | |
| twiddles = precomputeTwiddles(N_FFT); | |
| // Allocate FFT buffers | |
| fftRe = new Float64Array(N_FFT); | |
| fftIm = new Float64Array(N_FFT); | |
| powerBuf = new Float32Array(N_FREQ_BINS); | |
| // Fixed-size circular mel buffer for ~120 seconds (12000 frames at 100 fps). | |
| // Old frames are silently overwritten; no reallocation ever occurs. | |
| maxFrames = 12000; | |
| rawMelBuffer = new Float32Array(nMels * maxFrames); | |
| computedFrames = 0; | |
| baseFrame = 0; | |
| // Pre-emphasized audio buffer: only needs to hold the FFT overlap window | |
| // plus one incoming chunk. Compacted after each pushAudio call so it | |
| // stays bounded to roughly N_FFT + chunk_size samples. | |
| preemphBuffer = new Float32Array(N_FFT + 16000); // N_FFT overlap + up to 1s chunk | |
| preemphBaseIdx = 0; | |
| preemphLen = 0; | |
| lastRawSample = 0; | |
| totalSamples = 0; | |
| const melBufMB = (nMels * maxFrames * 4 / 1024 / 1024).toFixed(1); | |
| console.log(`[MelWorker] Initialized: nMels=${nMels}, maxFrames=${maxFrames} (circular), ${melBufMB}MB mel buffer, preemph=${preemphBuffer.length} samples, init ${(performance.now() - t0).toFixed(1)} ms`); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Incremental Mel Computation | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function pushAudio(chunk: Float32Array) { | |
| if (!rawMelBuffer) return; | |
| const chunkLen = chunk.length; | |
| if (chunkLen === 0) return; | |
| const t0 = performance.now(); | |
| // 1. Pre-emphasize the new chunk incrementally. | |
| // Grow preemph buffer if needed (safety net; compaction below keeps it small). | |
| if (preemphLen + chunkLen > preemphBuffer.length) { | |
| const newSize = Math.max(preemphBuffer.length * 2, preemphLen + chunkLen); | |
| const newBuf = new Float32Array(newSize); | |
| newBuf.set(preemphBuffer.subarray(0, preemphLen)); | |
| preemphBuffer = newBuf; | |
| } | |
| // Pre-emphasize | |
| preemphBuffer[preemphLen] = chunk[0] - PREEMPH * lastRawSample; | |
| for (let i = 1; i < chunkLen; i++) { | |
| preemphBuffer[preemphLen + i] = chunk[i] - PREEMPH * chunk[i - 1]; | |
| } | |
| preemphLen += chunkLen; | |
| lastRawSample = chunk[chunkLen - 1]; | |
| totalSamples += chunkLen; | |
| // 2. Compute new valid frames | |
| const newTotalFrames = Math.floor(totalSamples / HOP_LENGTH); | |
| if (newTotalFrames <= computedFrames) { | |
| compactPreemphBuffer(); | |
| return; | |
| } | |
| // 3. Compute each new frame, writing into the circular mel buffer. | |
| // No reallocation: old frames are silently overwritten via modulo. | |
| const pad = N_FFT >> 1; // 256 | |
| for (let t = computedFrames; t < newTotalFrames; t++) { | |
| const frameStart = t * HOP_LENGTH - pad; | |
| const circularT = t % maxFrames; | |
| // a) Window the frame (using local index into compacted preemph buffer) | |
| for (let k = 0; k < N_FFT; k++) { | |
| const globalIdx = frameStart + k; | |
| const localIdx = globalIdx - preemphBaseIdx; | |
| const sample = (localIdx >= 0 && localIdx < preemphLen) ? preemphBuffer[localIdx] : 0; | |
| fftRe[k] = sample * hannWindow[k]; | |
| fftIm[k] = 0; | |
| } | |
| // b) 512-point FFT | |
| fft(fftRe, fftIm, N_FFT, twiddles); | |
| // c) Power spectrum | |
| for (let k = 0; k < N_FREQ_BINS; k++) { | |
| powerBuf[k] = fftRe[k] * fftRe[k] + fftIm[k] * fftIm[k]; | |
| } | |
| // d) Mel filterbank multiply + log (circular write) | |
| for (let m = 0; m < nMels; m++) { | |
| let melVal = 0; | |
| const fbOff = m * N_FREQ_BINS; | |
| for (let k = 0; k < N_FREQ_BINS; k++) { | |
| melVal += powerBuf[k] * melFilterbank[fbOff + k]; | |
| } | |
| rawMelBuffer![m * maxFrames + circularT] = Math.log(melVal + LOG_ZERO_GUARD); | |
| } | |
| } | |
| const prevFrames = computedFrames; | |
| computedFrames = newTotalFrames; | |
| // Advance baseFrame when the circular buffer has wrapped | |
| if (computedFrames - baseFrame > maxFrames) { | |
| baseFrame = computedFrames - maxFrames; | |
| } | |
| // 4. Compact preemph buffer: discard samples no longer needed | |
| compactPreemphBuffer(); | |
| const newFramesComputed = newTotalFrames - prevFrames; | |
| if (newFramesComputed > 0) { | |
| const elapsed = performance.now() - t0; | |
| // Log every ~50 chunks (~4s) to avoid spam | |
| if (computedFrames % 50 < newFramesComputed) { | |
| console.log(`[MelWorker] pushAudio: +${chunkLen} samples, +${newFramesComputed} frames, total ${computedFrames} frames (${(totalSamples / 16000).toFixed(1)}s), buf [${baseFrame}..${computedFrames}), preemph ${preemphLen} samples, ${elapsed.toFixed(1)} ms`); | |
| } | |
| } | |
| } | |
| /** | |
| * Compact the preemph buffer by discarding samples that are no longer needed. | |
| * The next mel frame to be computed (computedFrames) requires samples starting | |
| * at global index (computedFrames * HOP_LENGTH - N_FFT/2). Everything before | |
| * that can be safely discarded. | |
| */ | |
| function compactPreemphBuffer() { | |
| const pad = N_FFT >> 1; | |
| const nextFrameStart = computedFrames * HOP_LENGTH - pad; | |
| const discardBefore = Math.max(0, nextFrameStart); | |
| const discardLocal = discardBefore - preemphBaseIdx; | |
| if (discardLocal > 0 && discardLocal < preemphLen) { | |
| // Shift remaining samples to front of buffer | |
| const remaining = preemphLen - discardLocal; | |
| preemphBuffer.copyWithin(0, discardLocal, discardLocal + remaining); | |
| preemphLen = remaining; | |
| preemphBaseIdx = discardBefore; | |
| } else if (discardLocal >= preemphLen) { | |
| // All current samples are stale | |
| preemphLen = 0; | |
| preemphBaseIdx = discardBefore; | |
| } | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Feature Extraction (normalize a requested range) | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Extract mel features for a frame range. | |
| * | |
| * @param startFrame - Start frame index | |
| * @param endFrame - End frame index (exclusive) | |
| * @param normalize - If true (default), apply per-feature mean/variance normalization | |
| * (required for ASR). If false, return raw log-mel values (for visualization with | |
| * fixed dB scaling to avoid "gain hunting" during silence). | |
| * | |
| * PERFORMANCE NOTE (2026-02-09): When normalize=false, the caller (e.g. visualizer) | |
| * still incurs the cost of extracting frames from the circular buffer. If visualization | |
| * performance becomes an issue, consider: | |
| * 1. Reducing visualizer update frequency | |
| * 2. Caching/reusing extracted frames between draws | |
| * 3. Downsampling the spectrogram (skip frames for display) | |
| */ | |
| function getFeatures(startFrame: number, endFrame: number, normalize: boolean = true): { | |
| features: Float32Array; | |
| T: number; | |
| melBins: number; | |
| } | null { | |
| const t0 = performance.now(); | |
| if (!rawMelBuffer || computedFrames === 0) { | |
| console.warn(`[MelWorker] getFeatures: no data (computedFrames=${computedFrames})`); | |
| return null; | |
| } | |
| // Clamp to available circular range [baseFrame, computedFrames) | |
| const sf = Math.max(baseFrame, startFrame); | |
| const ef = Math.min(computedFrames, endFrame); | |
| const T = ef - sf; | |
| if (T <= 0) { | |
| console.warn(`[MelWorker] getFeatures: empty range (requested ${startFrame}..${endFrame}, available ${baseFrame}..${computedFrames})`); | |
| return null; | |
| } | |
| // Extract the requested window from circular buffer (mel-major layout [nMels, T]) | |
| const raw = new Float32Array(nMels * T); | |
| for (let m = 0; m < nMels; m++) { | |
| const srcRowBase = m * maxFrames; | |
| const dstBase = m * T; | |
| for (let i = 0; i < T; i++) { | |
| const circularIdx = (sf + i) % maxFrames; | |
| raw[dstBase + i] = rawMelBuffer![srcRowBase + circularIdx]; | |
| } | |
| } | |
| // Optionally normalize (ASR requires normalized; visualizer uses raw for fixed dB scale) | |
| const features = normalize ? normalizeMelFeatures(raw, nMels, T) : raw; | |
| // Throttled logging to avoid console spam (was causing noticeable CPU overhead) | |
| const now = performance.now(); | |
| if (now - lastGetFeaturesLogTime > GET_FEATURES_LOG_INTERVAL) { | |
| lastGetFeaturesLogTime = now; | |
| const elapsed = now - t0; | |
| console.log(`[MelWorker] getFeatures: frames ${sf}..${ef} (${T} frames, ${(T * HOP_LENGTH / 16000).toFixed(2)}s), normalize=${normalize}, ${elapsed.toFixed(1)} ms, buf [${baseFrame}..${computedFrames})`); | |
| } | |
| return { features, T, melBins: nMels }; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Last mel frame (for equalizer-style bar display) | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| /** | |
| * Return the last computed mel frame (raw log-mel, one value per bin). | |
| * Averages the last 2 frames for smoother display. Returns null if no frames. | |
| */ | |
| function getLastMelFrame(): Float32Array | null { | |
| if (!rawMelBuffer || computedFrames === 0) return null; | |
| const out = new Float32Array(nMels); | |
| const lastIdx = (computedFrames - 1 + maxFrames) % maxFrames; | |
| const prevIdx = computedFrames >= 2 ? (computedFrames - 2 + maxFrames) % maxFrames : lastIdx; | |
| for (let m = 0; m < nMels; m++) { | |
| const base = m * maxFrames; | |
| out[m] = 0.5 * (rawMelBuffer[base + lastIdx] + rawMelBuffer[base + prevIdx]); | |
| } | |
| return out; | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Reset | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function reset() { | |
| preemphLen = 0; | |
| preemphBaseIdx = 0; | |
| lastRawSample = 0; | |
| totalSamples = 0; | |
| computedFrames = 0; | |
| baseFrame = 0; | |
| console.log('[MelWorker] Reset'); | |
| } | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| // Message Handler | |
| // βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| self.onmessage = (e: MessageEvent) => { | |
| const { type, payload, id } = e.data; | |
| try { | |
| switch (type) { | |
| case 'INIT': { | |
| init(payload || {}); | |
| postMessage({ type: 'INIT_DONE', id }); | |
| break; | |
| } | |
| case 'PUSH_AUDIO': { | |
| pushAudio(payload); | |
| // No response needed β fire and forget for continuous production | |
| break; | |
| } | |
| case 'GET_FEATURES': { | |
| const { startSample, endSample, normalize = true } = payload; | |
| const startFrame = sampleToFrame(startSample); | |
| const endFrame = sampleToFrame(endSample); | |
| const result = getFeatures(startFrame, endFrame, normalize); | |
| if (result) { | |
| // Transfer the features buffer for zero-copy | |
| postMessage( | |
| { type: 'GET_FEATURES_DONE', payload: result, id }, | |
| [result.features.buffer] as any | |
| ); | |
| } else { | |
| postMessage({ | |
| type: 'GET_FEATURES_DONE', | |
| payload: null, | |
| id | |
| }); | |
| } | |
| break; | |
| } | |
| case 'GET_STATUS': { | |
| postMessage({ | |
| type: 'GET_STATUS_DONE', | |
| payload: { | |
| totalSamples, | |
| computedFrames, | |
| bufferCapacityFrames: maxFrames, | |
| melBins: nMels, | |
| }, | |
| id, | |
| }); | |
| break; | |
| } | |
| case 'GET_LAST_MEL_FRAME': { | |
| const frame = getLastMelFrame(); | |
| if (frame) { | |
| postMessage( | |
| { type: 'GET_LAST_MEL_FRAME_DONE', payload: { melFrame: frame }, id }, | |
| [frame.buffer] as any | |
| ); | |
| } else { | |
| postMessage({ type: 'GET_LAST_MEL_FRAME_DONE', payload: null, id }); | |
| } | |
| break; | |
| } | |
| case 'RESET': { | |
| reset(); | |
| postMessage({ type: 'RESET_DONE', id }); | |
| break; | |
| } | |
| default: | |
| console.warn('[MelWorker] Unknown message type:', type); | |
| } | |
| } catch (err: any) { | |
| console.error('[MelWorker] Error:', err); | |
| postMessage({ type: 'ERROR', payload: err.message, id }); | |
| } | |
| }; | |
| console.log('[MelWorker] Worker script loaded'); | |