Spaces:

ysdede
/

keet-streaming

Running

App Files Files Community

keet-streaming / src /components /LayeredBufferVisualizer.tsx

ysdede

feat(space): migrate Hugging Face Space to keet SolidJS app

b8cc2bf 25 days ago

raw

history blame contribute delete

18.2 kB

	import { Component, onMount, onCleanup, createSignal } from 'solid-js';
	import type { AudioEngine } from '../lib/audio/types';
	import type { MelWorkerClient } from '../lib/audio/MelWorkerClient';
	import { normalizeMelForDisplay } from '../lib/audio/mel-display';
	import { appStore } from '../stores/appStore';

	interface LayeredBufferVisualizerProps {
	audioEngine?: AudioEngine;
	melClient?: MelWorkerClient;
	height?: number; // Total height
	windowDuration?: number; // default 8.0s
	}

	const MEL_BINS = 128; // Standard for this app

	// dB scaling is in mel-display.ts (shared with bar visualizer)

	// Pre-computed 256-entry RGB lookup table for mel heatmap (black to red).
	// Built once at module load; indexed by Math.round(intensity * 255).
	// Colormap: black -> blue -> purple -> green -> yellow -> orange -> red.
	const COLORMAP_LUT = (() => {
	const stops: [number, number, number, number][] = [
	[0, 0, 0, 0], // black
	[0.12, 0, 0, 180], // blue
	[0.30, 120, 0, 160], // purple
	[0.48, 0, 180, 80], // green
	[0.65, 220, 220, 0], // yellow
	[0.82, 255, 140, 0], // orange
	[1, 255, 0, 0], // red
	];
	// 256 entries * 3 channels (R, G, B) packed into a Uint8Array
	const lut = new Uint8Array(256 * 3);
	for (let i = 0; i < 256; i++) {
	const intensity = i / 255;
	let r = 0, g = 0, b = 0;
	for (let s = 0; s < stops.length - 1; s++) {
	const [t0, r0, g0, b0] = stops[s];
	const [t1, r1, g1, b1] = stops[s + 1];
	if (intensity >= t0 && intensity <= t1) {
	const t = (intensity - t0) / (t1 - t0);
	r = Math.round(r0 + t * (r1 - r0));
	g = Math.round(g0 + t * (g1 - g0));
	b = Math.round(b0 + t * (b1 - b0));
	break;
	}
	}
	if (intensity >= stops[stops.length - 1][0]) {
	const last = stops[stops.length - 1];
	r = last[1]; g = last[2]; b = last[3];
	}
	const base = i * 3;
	lut[base] = r;
	lut[base + 1] = g;
	lut[base + 2] = b;
	}
	return lut;
	})();

	export const LayeredBufferVisualizer: Component<LayeredBufferVisualizerProps> = (props) => {
	let canvasRef: HTMLCanvasElement \| undefined;
	let ctx: CanvasRenderingContext2D \| null = null;
	let animationFrameId: number;

	const getWindowDuration = () => props.windowDuration \|\| 8.0;

	// Offscreen canvas for spectrogram caching (scrolling)
	let specCanvas: HTMLCanvasElement \| undefined;
	let specCtx: CanvasRenderingContext2D \| null = null;

	// State for last fetch to throttle spectrogram updates
	let lastSpecFetchTime = 0;
	const SPEC_FETCH_INTERVAL = 100; // Update spectrogram every 100ms (10fps)
	const DRAW_INTERVAL_MS = 33; // Throttle full redraw to ~30fps
	let lastDrawTime = 0;

	// --- Cached layout dimensions (updated via ResizeObserver, NOT per-frame) ---
	// Avoids getBoundingClientRect() every animation frame which forces synchronous
	// layout reflow and was the #1 perf bottleneck (1.5s layout-shift clusters).
	let cachedPhysicalWidth = 0;
	let cachedPhysicalHeight = 0;
	let cachedDpr = window.devicePixelRatio \|\| 1;
	let resizeObserver: ResizeObserver \| null = null;
	let dprMediaQuery: MediaQueryList \| null = null;

	/** Recompute physical canvas dimensions from cached logical size + DPR. */
	const updateCanvasDimensions = (logicalW: number, logicalH: number) => {
	cachedDpr = window.devicePixelRatio \|\| 1;
	cachedPhysicalWidth = Math.floor(logicalW * cachedDpr);
	cachedPhysicalHeight = Math.floor(logicalH * cachedDpr);

	// Resize canvases immediately so next frame uses correct size
	if (canvasRef && (canvasRef.width !== cachedPhysicalWidth \|\| canvasRef.height !== cachedPhysicalHeight)) {
	canvasRef.width = cachedPhysicalWidth;
	canvasRef.height = cachedPhysicalHeight;
	}
	if (specCanvas && (specCanvas.width !== cachedPhysicalWidth \|\| specCanvas.height !== cachedPhysicalHeight)) {
	specCanvas.width = cachedPhysicalWidth;
	specCanvas.height = cachedPhysicalHeight;
	}
	};

	// --- Pre-allocated ImageData for spectrogram rendering ---
	// Avoids creating a new ImageData object every spectrogram draw (~10fps),
	// which caused GC pressure from large short-lived allocations.
	let cachedSpecImgData: ImageData \| null = null;
	let cachedSpecImgWidth = 0;
	let cachedSpecImgHeight = 0;

	// --- Pre-allocated waveform read buffer ---
	// Avoids allocating a new Float32Array(~128000) every animation frame.
	// Grows only when the required size exceeds current capacity.
	let waveformReadBuf: Float32Array \| null = null;

	// Store spectrogram data with its time alignment
	let cachedSpecData: {
	features: Float32Array;
	melBins: number;
	timeSteps: number;
	startTime: number;
	endTime: number;
	} \| null = null;

	onMount(() => {
	if (canvasRef) {
	ctx = canvasRef.getContext('2d', { alpha: false });

	// Use ResizeObserver to cache dimensions instead of per-frame getBoundingClientRect
	resizeObserver = new ResizeObserver((entries) => {
	for (const entry of entries) {
	// contentRect gives CSS-pixel (logical) dimensions without forcing layout
	const cr = entry.contentRect;
	updateCanvasDimensions(cr.width, cr.height);
	}
	});
	resizeObserver.observe(canvasRef);

	// Watch for DPR changes (browser zoom, display change)
	const setupDprWatch = () => {
	dprMediaQuery = window.matchMedia(`(resolution: ${window.devicePixelRatio}dppx)`);
	const onDprChange = () => {
	if (canvasRef) {
	const rect = canvasRef.getBoundingClientRect(); // one-time on zoom change only
	updateCanvasDimensions(rect.width, rect.height);
	}
	// Re-register for the next change at the new DPR
	setupDprWatch();
	};
	dprMediaQuery.addEventListener('change', onDprChange, { once: true });
	};
	setupDprWatch();

	// Initial dimensions (one-time)
	const rect = canvasRef.getBoundingClientRect();
	updateCanvasDimensions(rect.width, rect.height);
	}

	// Create offscreen canvas
	specCanvas = document.createElement('canvas');
	specCtx = specCanvas.getContext('2d', { alpha: false });

	loop();
	});

	onCleanup(() => {
	cancelAnimationFrame(animationFrameId);
	if (resizeObserver) {
	resizeObserver.disconnect();
	resizeObserver = null;
	}
	});

	const loop = (now: number = performance.now()) => {
	if (!ctx \|\| !canvasRef \|\| !props.audioEngine) {
	animationFrameId = requestAnimationFrame(loop);
	return;
	}

	if (now - lastDrawTime < DRAW_INTERVAL_MS) {
	animationFrameId = requestAnimationFrame(loop);
	return;
	}
	lastDrawTime = now;

	// Use cached dimensions (updated by ResizeObserver / DPR watcher)
	const dpr = cachedDpr;
	const width = cachedPhysicalWidth;
	const height = cachedPhysicalHeight;

	if (width === 0 \|\| height === 0) {
	animationFrameId = requestAnimationFrame(loop);
	return;
	}

	// Colors
	const bgColor = '#0f172a';
	ctx.fillStyle = bgColor;
	ctx.fillRect(0, 0, width, height);

	const ringBuffer = props.audioEngine.getRingBuffer();
	const currentTime = ringBuffer.getCurrentTime();
	const duration = getWindowDuration();
	const startTime = currentTime - duration;
	const sampleRate = ringBuffer.sampleRate;

	// Layout:
	// Top 55%: Spectrogram
	// Middle 35%: Waveform
	// Bottom 10%: VAD signal
	const specHeight = Math.floor(height * 0.55);
	const waveHeight = Math.floor(height * 0.35);
	const vadHeight = height - specHeight - waveHeight;
	const waveY = specHeight;
	const vadY = specHeight + waveHeight;

	// 1. Spectrogram (async fetch with stored alignment)
	if (props.melClient && specCtx && specCanvas) {
	if (now - lastSpecFetchTime > SPEC_FETCH_INTERVAL) {
	lastSpecFetchTime = now;

	const fetchStartSample = Math.round(startTime * sampleRate);
	const fetchEndSample = Math.round(currentTime * sampleRate);

	// Request RAW (unnormalized) features for fixed dB scaling.
	// ASR transcription still uses normalized features (default).
	props.melClient.getFeatures(fetchStartSample, fetchEndSample, false).then(features => {
	if (features && specCtx && specCanvas) {
	// Store with time alignment info
	cachedSpecData = {
	features: features.features,
	melBins: features.melBins,
	timeSteps: features.T,
	startTime: startTime,
	endTime: currentTime
	};
	drawSpectrogramToCanvas(specCtx, features.features, features.melBins, features.T, width, specHeight);
	}
	}).catch(() => { });
	}

	// Draw cached spectrogram aligned to current view
	if (cachedSpecData && cachedSpecData.timeSteps > 0) {
	// Calculate offset to align cached data with current time window
	const cachedDuration = cachedSpecData.endTime - cachedSpecData.startTime;
	const timeOffset = startTime - cachedSpecData.startTime;
	const offsetX = Math.floor((timeOffset / cachedDuration) * width);

	// Draw the portion of cached spectrogram that's still visible
	ctx.drawImage(specCanvas, offsetX, 0, width - offsetX, specHeight, 0, 0, width - offsetX, specHeight);
	}
	}

	// 2. Waveform (sync with current time window, zero-allocation read)
	try {
	const startSample = Math.floor(startTime * sampleRate);
	const endSample = Math.floor(currentTime * sampleRate);
	const neededLen = endSample - startSample;

	const baseFrame = ringBuffer.getBaseFrameOffset();
	if (startSample >= baseFrame && neededLen > 0) {
	// Use readInto if available (zero-alloc), fall back to read()
	if (ringBuffer.readInto) {
	// Grow the pre-allocated buffer only when capacity is insufficient
	if (!waveformReadBuf \|\| waveformReadBuf.length < neededLen) {
	waveformReadBuf = new Float32Array(neededLen);
	}
	const written = ringBuffer.readInto(startSample, endSample, waveformReadBuf);
	// Pass a subarray view (no copy) of the exact length
	drawWaveform(ctx, waveformReadBuf.subarray(0, written), width, waveHeight, waveY);
	} else {
	const audioData = ringBuffer.read(startSample, endSample);
	drawWaveform(ctx, audioData, width, waveHeight, waveY);
	}
	}
	} catch (e) {
	// Data likely overwritten or not available
	}

	// 3. VAD Signal Layer
	drawVadLayer(ctx, width, vadHeight, vadY, startTime, duration, dpr);

	// 4. Overlay (time labels, trigger line)
	drawOverlay(ctx, width, height, startTime, duration, dpr);

	animationFrameId = requestAnimationFrame(loop);
	};

	const drawSpectrogramToCanvas = (
	ctx: CanvasRenderingContext2D,
	features: Float32Array,
	melBins: number,
	timeSteps: number,
	width: number,
	height: number
	) => {
	// features layout: [melBins, T] (mel-major, flattened from [mel, time])
	// So features[m * timeSteps + t].

	if (timeSteps === 0) return;

	// Reuse cached ImageData if dimensions match; allocate only on size change
	if (!cachedSpecImgData \|\| cachedSpecImgWidth !== width \|\| cachedSpecImgHeight !== height) {
	cachedSpecImgData = ctx.createImageData(width, height);
	cachedSpecImgWidth = width;
	cachedSpecImgHeight = height;
	}
	const imgData = cachedSpecImgData;
	const data = imgData.data;

	// Scaling factors
	const timeScale = timeSteps / width;
	const freqScale = melBins / height;

	for (let x = 0; x < width; x++) {
	const t = Math.floor(x * timeScale);
	if (t >= timeSteps) break;

	for (let y = 0; y < height; y++) {
	// y=0 is top (high freq), y=height is bottom (low freq).
	const m = Math.floor((height - 1 - y) * freqScale);
	if (m >= melBins) continue;

	const val = features[m * timeSteps + t];
	const clamped = normalizeMelForDisplay(val);
	const lutIdx = (clamped * 255) \| 0;
	const lutBase = lutIdx * 3;

	const idx = (y * width + x) * 4;
	data[idx] = COLORMAP_LUT[lutBase];
	data[idx + 1] = COLORMAP_LUT[lutBase + 1];
	data[idx + 2] = COLORMAP_LUT[lutBase + 2];
	data[idx + 3] = 255;
	}
	}
	ctx.putImageData(imgData, 0, 0);
	};

	// Use gain 1 so waveform shows true amplitude (float32 in [-1,1] fills half-height).
	// No display amplification; ASR pipeline is unchanged.
	const WAVEFORM_GAIN = 1;

	const drawWaveform = (ctx: CanvasRenderingContext2D, data: Float32Array, width: number, height: number, offsetY: number) => {
	if (data.length === 0) return;

	const step = Math.ceil(data.length / width);
	const amp = (height / 2) * WAVEFORM_GAIN;
	const centerY = offsetY + height / 2;

	ctx.strokeStyle = '#4ade80'; // Green
	ctx.lineWidth = 1;
	ctx.beginPath();

	for (let x = 0; x < width; x++) {
	const startIdx = x * step;
	const endIdx = Math.min((x + 1) * step, data.length);

	let min = 1;
	let max = -1;
	let hasData = false;

	for (let i = startIdx; i < endIdx; i += Math.max(1, Math.floor((endIdx - startIdx) / 10))) {
	const s = data[i];
	if (s < min) min = s;
	if (s > max) max = s;
	hasData = true;
	}

	if (hasData) {
	const yMin = centerY - min * amp;
	const yMax = centerY - max * amp;
	ctx.moveTo(x, Math.max(offsetY, Math.min(offsetY + height, yMin)));
	ctx.lineTo(x, Math.max(offsetY, Math.min(offsetY + height, yMax)));
	}
	}
	ctx.stroke();
	};

	const drawVadLayer = (ctx: CanvasRenderingContext2D, width: number, height: number, offsetY: number, startTime: number, duration: number, dpr: number) => {
	// Draw VAD state as a colored bar
	// For now, just show current VAD state as a solid bar (could be enhanced with historical data)
	const vadState = appStore.vadState();
	const isSpeech = vadState.isSpeech;

	// Background
	ctx.fillStyle = isSpeech ? 'rgba(249, 115, 22, 0.4)' : 'rgba(100, 116, 139, 0.2)'; // Orange when speech, slate when silence
	ctx.fillRect(0, offsetY, width, height);

	// If energy-based detection is active, show energy level as a bar
	const energyLevel = appStore.audioLevel();
	const energyThreshold = appStore.energyThreshold();

	if (energyLevel > 0) {
	const barWidth = Math.min(width, width * (energyLevel / 0.3)); // Scale to max 30% energy
	ctx.fillStyle = energyLevel > energyThreshold ? 'rgba(249, 115, 22, 0.8)' : 'rgba(74, 222, 128, 0.6)';
	ctx.fillRect(width - barWidth, offsetY, barWidth, height);
	}

	// Draw a thin separator line at top
	ctx.strokeStyle = 'rgba(148, 163, 184, 0.3)';
	ctx.lineWidth = 1 * dpr;
	ctx.beginPath();
	ctx.moveTo(0, offsetY);
	ctx.lineTo(width, offsetY);
	ctx.stroke();

	// Label
	ctx.fillStyle = isSpeech ? '#fb923c' : '#64748b';
	ctx.font = `${8 * dpr}px monospace`;
	ctx.fillText(isSpeech ? 'SPEECH' : 'SILENCE', 4 * dpr, offsetY + height - 2 * dpr);
	};

	const drawOverlay = (ctx: CanvasRenderingContext2D, width: number, height: number, startTime: number, duration: number, dpr: number) => {
	// Draw Trigger line (1.5s from right) if in V3 mode
	const triggerX = width - (1.5 / duration) * width;
	ctx.strokeStyle = 'rgba(255, 255, 0, 0.5)';
	ctx.lineWidth = 1 * dpr;
	ctx.beginPath();
	ctx.moveTo(triggerX, 0);
	ctx.lineTo(triggerX, height);
	ctx.stroke();

	// Time labels
	ctx.fillStyle = '#94a3b8';
	ctx.font = `${10 * dpr}px monospace`;
	for (let i = 0; i <= 8; i += 2) {
	const t = i;
	const x = width - (t / duration) * width;
	ctx.fillText(`-${t}s`, x + 3 * dpr, height - 6 * dpr);
	}
	};

	return (
	<div
	class="relative w-full bg-slate-900 rounded border border-slate-700 overflow-hidden shadow-inner"
	style={{ height: `${props.height \|\| 200}px` }}
	>
	<canvas ref={canvasRef} class="w-full h-full block" />
	<div class="absolute top-2 left-2 text-[10px] text-slate-400 pointer-events-none">
	SPECTROGRAM + WAVEFORM ({getWindowDuration()}s)
	</div>
	</div>
	);
	};