Spaces:

ysdede
/

keet-streaming

Running

App Files Files Community

keet-streaming / src /App.tsx

ysdede

fix(hf-space): harden runtime loading and cache behavior

8a16cdb 28 days ago

raw

history blame contribute delete

42.9 kB

	import { Component, Show, For, createSignal, createEffect, onMount, onCleanup } from 'solid-js';
	import { appStore } from './stores/appStore';
	import { CompactWaveform, ModelLoadingOverlay, DebugPanel, TranscriptionDisplay, SettingsContent } from './components';
	import { getModelDisplayName, MODELS } from './components/ModelLoadingOverlay';
	import { AudioEngine } from './lib/audio';
	import { MelWorkerClient } from './lib/audio/MelWorkerClient';
	import { TranscriptionWorkerClient } from './lib/transcription';
	import { HybridVAD } from './lib/vad';
	import { WindowBuilder } from './lib/transcription/WindowBuilder';
	import { BufferWorkerClient } from './lib/buffer';
	import { TenVADWorkerClient } from './lib/vad/TenVADWorkerClient';
	import type { V4ProcessResult } from './lib/transcription/TranscriptionWorkerClient';
	import type { BufferWorkerConfig, TenVADResult } from './lib/buffer/types';
	import { formatDuration } from './utils/time';

	// Singleton instances
	let audioEngine: AudioEngine \| null = null;
	export const [audioEngineSignal, setAudioEngineSignal] = createSignal<AudioEngine \| null>(null);

	let workerClient: TranscriptionWorkerClient \| null = null;
	let melClient: MelWorkerClient \| null = null;
	export const [melClientSignal, setMelClientSignal] = createSignal<MelWorkerClient \| null>(null);
	let segmentUnsubscribe: (() => void) \| null = null;
	let windowUnsubscribe: (() => void) \| null = null;
	let melChunkUnsubscribe: (() => void) \| null = null;
	let visualizationUnsubscribe: (() => void) \| undefined;
	// v4 pipeline instances
	let hybridVAD: HybridVAD \| null = null;
	let bufferClient: BufferWorkerClient \| null = null;
	let tenVADClient: TenVADWorkerClient \| null = null;
	let windowBuilder: WindowBuilder \| null = null;
	let v4TickTimeout: number \| undefined;
	let v4TickRunning = false;
	let v4AudioChunkUnsubscribe: (() => void) \| null = null;
	let v4MelChunkUnsubscribe: (() => void) \| null = null;
	let v4InferenceBusy = false;
	let v4LastInferenceTime = 0;
	// Global sample counter for audio chunks (tracks total samples written to BufferWorker)
	let v4GlobalSampleOffset = 0;
	// Throttle UI updates from TEN-VAD to at most once per frame
	let pendingSileroProb: number \| null = null;
	let sileroUpdateScheduled = false;
	let pendingVadState: {
	isSpeech: boolean;
	energy: number;
	snr: number;
	hybridState: string;
	sileroProbability?: number;
	} \| null = null;
	let vadUpdateScheduled = false;

	const scheduleSileroUpdate = (prob: number) => {
	pendingSileroProb = prob;
	if (sileroUpdateScheduled) return;
	sileroUpdateScheduled = true;
	requestAnimationFrame(() => {
	sileroUpdateScheduled = false;
	if (pendingSileroProb === null) return;
	const currentState = appStore.vadState();
	appStore.setVadState({
	...currentState,
	sileroProbability: pendingSileroProb,
	});
	});
	};

	const scheduleVadStateUpdate = (next: {
	isSpeech: boolean;
	energy: number;
	snr: number;
	hybridState: string;
	sileroProbability?: number;
	}) => {
	pendingVadState = next;
	if (vadUpdateScheduled) return;
	vadUpdateScheduled = true;
	requestAnimationFrame(() => {
	vadUpdateScheduled = false;
	if (!pendingVadState) return;
	const currentState = appStore.vadState();
	const sileroProbability =
	pendingVadState.sileroProbability !== undefined
	? pendingVadState.sileroProbability
	: currentState.sileroProbability;
	appStore.setVadState({
	...currentState,
	...pendingVadState,
	sileroProbability,
	});
	appStore.setIsSpeechDetected(pendingVadState.isSpeech);
	pendingVadState = null;
	});
	};

	const Header: Component<{
	onToggleDebug: () => void;
	}> = (props) => {
	const sessionLabel = () =>
	appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : 'Session';
	return (
	<header class="h-20 flex items-center justify-between px-8 bg-[var(--color-earthy-bg)]/80 backdrop-blur-sm z-30 shrink-0">
	<div class="flex items-center gap-6">
	<div class="flex items-center gap-3">
	<div class="w-10 h-10 rounded-full bg-[var(--color-earthy-muted-green)] flex items-center justify-center text-white">
	<span class="material-symbols-outlined text-xl">auto_awesome</span>
	</div>
	<div>
	<h1 class="text-lg font-semibold tracking-tight text-[var(--color-earthy-dark-brown)]">keet</h1>
	<p class="text-[10px] uppercase tracking-[0.2em] text-[var(--color-earthy-soft-brown)] font-medium">{sessionLabel()}</p>
	</div>
	</div>
	</div>
	<div class="flex items-center gap-4">
	<button
	type="button"
	onClick={props.onToggleDebug}
	class={`p-2 rounded-full transition-colors ${appStore.showDebugPanel() ? 'bg-[var(--color-earthy-muted-green)] text-white' : 'text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/30'}`}
	title={appStore.showDebugPanel() ? 'Hide debug panel' : 'Show debug panel'}
	aria-label="Toggle debug panel"
	>
	<span class="material-symbols-outlined">bug_report</span>
	</button>
	<button
	type="button"
	class="p-2 text-[var(--color-earthy-muted-green)] hover:scale-110 transition-transform"
	aria-label="More options"
	>
	<span class="material-symbols-outlined">more_vert</span>
	</button>
	</div>
	</header>
	);
	};

	const WIDGET_STORAGE_KEY = 'boncukjs-control-widget-pos';
	const WIDGET_MAX_W = 672;
	const WIDGET_MIN_H = 80;

	const App: Component = () => {
	const [showModelOverlay, setShowModelOverlay] = createSignal(false);
	const [showContextPanel, setShowContextPanel] = createSignal(false);
	type SettingsPanelSection = 'full' \| 'audio' \| 'model';
	const [settingsPanelSection, setSettingsPanelSection] = createSignal<SettingsPanelSection>('full');
	let panelHoverCloseTimeout: number \| undefined;
	const [workerReady, setWorkerReady] = createSignal(false);
	const [widgetPos, setWidgetPos] = createSignal<{ x: number; y: number } \| null>(null);
	const [isDragging, setIsDragging] = createSignal(false);

	const isRecording = () => appStore.recordingState() === 'recording';
	const isModelReady = () => appStore.modelState() === 'ready';

	let dragStart = { x: 0, y: 0 };
	let posStart = { x: 0, y: 0 };

	const [windowHeight, setWindowHeight] = createSignal(typeof window !== 'undefined' ? window.innerHeight : 600);
	const settingsExpandUp = () => {
	const pos = widgetPos();
	if (!pos) return true;
	return pos.y >= windowHeight() / 2;
	};

	const handleWidgetDragStart = (e: MouseEvent) => {
	const target = e.target;
	if (target instanceof Element && target.closest('button, select, input')) return;
	e.preventDefault();
	const pos = widgetPos();
	if (!pos) return;
	setIsDragging(true);
	dragStart = { x: e.clientX, y: e.clientY };
	posStart = { ...pos };
	const onMove = (e2: MouseEvent) => {
	const dx = e2.clientX - dragStart.x;
	const dy = e2.clientY - dragStart.y;
	const w = typeof window !== 'undefined' ? window.innerWidth : 800;
	const h = typeof window !== 'undefined' ? window.innerHeight : 600;
	const newX = Math.max(0, Math.min(w - WIDGET_MAX_W, posStart.x + dx));
	const newY = Math.max(0, Math.min(h - WIDGET_MIN_H, posStart.y + dy));
	setWidgetPos({ x: newX, y: newY });
	};
	const onUp = () => {
	setIsDragging(false);
	window.removeEventListener('mousemove', onMove);
	window.removeEventListener('mouseup', onUp);
	const p = widgetPos();
	if (p && typeof localStorage !== 'undefined') {
	try {
	localStorage.setItem(WIDGET_STORAGE_KEY, JSON.stringify(p));
	} catch (_) {}
	}
	};
	window.addEventListener('mousemove', onMove);
	window.addEventListener('mouseup', onUp);
	};

	createEffect(() => {
	if (!showContextPanel()) return;
	const handler = (e: KeyboardEvent) => {
	if (e.key === 'Escape') {
	e.preventDefault();
	setShowContextPanel(false);
	}
	};
	document.addEventListener('keydown', handler);
	return () => document.removeEventListener('keydown', handler);
	});

	createEffect(() => {
	if (appStore.modelState() === 'ready' && showContextPanel() && settingsPanelSection() === 'model') {
	setShowContextPanel(false);
	}
	});

	onMount(() => {
	const onResize = () => setWindowHeight(window.innerHeight);
	window.addEventListener('resize', onResize);

	const stored =
	typeof localStorage !== 'undefined' ? localStorage.getItem(WIDGET_STORAGE_KEY) : null;
	let posRestored = false;
	if (stored) {
	try {
	const parsed = JSON.parse(stored) as { x: number; y: number };
	if (Number.isFinite(parsed.x) && Number.isFinite(parsed.y)) {
	setWidgetPos({ x: parsed.x, y: parsed.y });
	posRestored = true;
	}
	} catch (_) {}
	}
	if (!posRestored) {
	const w = window.innerWidth;
	const h = window.innerHeight;
	setWidgetPos({
	x: Math.max(0, (w - WIDGET_MAX_W) / 2),
	y: h - 140,
	});
	}

	workerClient = new TranscriptionWorkerClient();

	workerClient.onModelProgress = (p) => {
	appStore.setModelProgress(p.progress);
	appStore.setModelMessage(p.message \|\| '');
	if (p.file) appStore.setModelFile(p.file);
	};

	workerClient.onModelStateChange = (s) => {
	appStore.setModelState(s);
	};

	workerClient.onV3Confirmed = (text) => {
	appStore.setTranscript(text);
	};

	workerClient.onV3Pending = (text) => {
	appStore.setPendingText(text);
	};

	workerClient.onError = (msg) => {
	appStore.setErrorMessage(msg);
	};

	appStore.refreshDevices();
	setWorkerReady(true);

	return () => window.removeEventListener('resize', onResize);
	});

	// No longer auto-show blocking model overlay; model selection is in the settings panel.
	// createEffect(() => { ... setShowModelOverlay(true); });

	onCleanup(() => {
	clearTimeout(panelHoverCloseTimeout);
	visualizationUnsubscribe?.();
	cleanupV4Pipeline();
	melClient?.dispose();
	workerClient?.dispose();
	});

	// ---- v4 pipeline tick: periodic window building + inference ----
	let v4TickCount = 0;
	let v4ModelNotReadyLogged = false;
	const v4Tick = async () => {
	if (!workerClient \|\| !windowBuilder \|\| !audioEngine \|\| !bufferClient \|\| v4InferenceBusy) return;

	// Skip inference if model is not ready (but still allow audio/mel/VAD to process)
	if (appStore.modelState() !== 'ready') {
	if (!v4ModelNotReadyLogged) {
	console.log('[v4Tick] Model not ready yet - audio is being captured and preprocessed');
	v4ModelNotReadyLogged = true;
	}
	return;
	}
	// Reset the flag once model becomes ready
	if (v4ModelNotReadyLogged) {
	console.log('[v4Tick] Model is now ready - starting inference');
	v4ModelNotReadyLogged = false;
	// Initialize the v4 service now that model is ready
	await workerClient.initV4Service({ debug: false });
	}

	v4TickCount++;
	const now = performance.now();
	// Use the store's configurable inference interval (minus a small margin for the tick jitter)
	const minInterval = Math.max(200, appStore.v4InferenceIntervalMs() - 100);
	if (now - v4LastInferenceTime < minInterval) return;

	// Check if there is speech via the BufferWorker (async query).
	// We check both energy and inference VAD layers; either one detecting speech triggers inference.
	const cursorSample = windowBuilder.getMatureCursorFrame(); // frame === sample in our pipeline
	const currentSample = v4GlobalSampleOffset;
	const startSample = cursorSample > 0 ? cursorSample : 0;

	let hasSpeech = false;
	if (currentSample > startSample) {
	// Check energy VAD first (always available, low latency)
	const energyResult = await bufferClient.hasSpeech('energyVad', startSample, currentSample, 0.3);

	// When inference VAD is ready, require BOTH energy AND inference to agree
	// This prevents false positives from music/noise that has high energy but no speech
	if (tenVADClient?.isReady()) {
	const inferenceResult = await bufferClient.hasSpeech('inferenceVad', startSample, currentSample, 0.5);
	// Require both energy and inference VAD to agree (AND logic)
	hasSpeech = energyResult.hasSpeech && inferenceResult.hasSpeech;
	} else {
	// Fall back to energy-only if inference VAD is not available
	hasSpeech = energyResult.hasSpeech;
	}
	}

	if (v4TickCount <= 5 \|\| v4TickCount % 20 === 0) {
	const vadState = appStore.vadState();
	const rb = audioEngine.getRingBuffer();
	const rbFrame = rb.getCurrentFrame();
	const rbBase = rb.getBaseFrameOffset();
	console.log(
	`[v4Tick #${v4TickCount}] hasSpeech=${hasSpeech}, vadState=${vadState.hybridState}, ` +
	`energy=${vadState.energy.toFixed(4)}, inferenceVAD=${(vadState.sileroProbability \|\| 0).toFixed(2)}, ` +
	`samples=[${startSample}:${currentSample}], ` +
	`ringBuf=[base=${rbBase}, head=${rbFrame}, avail=${rbFrame - rbBase}]`
	);
	}

	// Periodic buffer worker state dump (every 40 ticks)
	if (v4TickCount % 40 === 0 && bufferClient) {
	try {
	const state = await bufferClient.getState();
	const layerSummary = Object.entries(state.layers)
	.map(([id, l]) => `${id}:${l.fillCount}/${l.maxEntries}@${l.currentSample}`)
	.join(', ');
	console.log(`[v4Tick #${v4TickCount}] BufferState: ${layerSummary}`);
	} catch (_) { /* ignore state query errors */ }
	}

	if (!hasSpeech) {
	// Check for silence-based flush using BufferWorker
	const silenceDuration = await bufferClient.getSilenceTailDuration('energyVad', 0.3);
	if (silenceDuration >= appStore.v4SilenceFlushSec()) {
	// Flush pending sentence via timeout finalization
	try {
	const flushResult = await workerClient.v4FinalizeTimeout();
	if (flushResult) {
	appStore.setMatureText(flushResult.matureText);
	appStore.setImmatureText(flushResult.immatureText);
	appStore.setMatureCursorTime(flushResult.matureCursorTime);
	appStore.setTranscript(flushResult.fullText);
	appStore.appendV4SentenceEntries(flushResult.matureSentences);
	appStore.setV4MergerStats({
	sentencesFinalized: flushResult.matureSentenceCount,
	cursorUpdates: flushResult.stats?.matureCursorUpdates \|\| 0,
	utterancesProcessed: flushResult.stats?.utterancesProcessed \|\| 0,
	});
	// Advance window builder cursor
	windowBuilder.advanceMatureCursorByTime(flushResult.matureCursorTime);
	}
	} catch (err) {
	console.error('[v4Tick] Flush error:', err);
	}
	}
	return;
	}

	// Build window from cursor to current position
	const window = windowBuilder.buildWindow();
	if (!window) {
	if (v4TickCount <= 10 \|\| v4TickCount % 20 === 0) {
	const rb = audioEngine.getRingBuffer();
	const rbHead = rb.getCurrentFrame();
	const rbBase = rb.getBaseFrameOffset();
	console.log(
	`[v4Tick #${v4TickCount}] buildWindow=null, ` +
	`ringBuf=[base=${rbBase}, head=${rbHead}, avail=${rbHead - rbBase}], ` +
	`cursor=${windowBuilder.getMatureCursorFrame()}`
	);
	}
	return;
	}

	console.log(`[v4Tick #${v4TickCount}] Window [${window.startFrame}:${window.endFrame}] ${window.durationSeconds.toFixed(2)}s (initial=${window.isInitial})`);

	v4InferenceBusy = true;
	v4LastInferenceTime = now;

	try {
	const inferenceStart = performance.now();

	// Get mel features for the window
	let features: { features: Float32Array; T: number; melBins: number } \| null = null;
	if (melClient) {
	features = await melClient.getFeatures(window.startFrame, window.endFrame);
	}

	if (!features) {
	v4InferenceBusy = false;
	return;
	}

	// Calculate time offset for absolute timestamps
	const timeOffset = window.startFrame / 16000;

	// Calculate incremental cache parameters
	const cursorFrame = windowBuilder.getMatureCursorFrame();
	const prefixSeconds = cursorFrame > 0 ? (window.startFrame - cursorFrame) / 16000 : 0;

	const result: V4ProcessResult = await workerClient.processV4ChunkWithFeatures({
	features: features.features,
	T: features.T,
	melBins: features.melBins,
	timeOffset,
	endTime: window.endFrame / 16000,
	segmentId: `v4_${Date.now()}`,
	incrementalCache: prefixSeconds > 0 ? {
	cacheKey: 'v4-stream',
	prefixSeconds,
	} : undefined,
	});

	const inferenceMs = performance.now() - inferenceStart;

	// Update UI state
	appStore.setMatureText(result.matureText);
	appStore.setImmatureText(result.immatureText);
	appStore.setTranscript(result.fullText);
	appStore.setPendingText(result.immatureText);
	appStore.appendV4SentenceEntries(result.matureSentences);
	appStore.setInferenceLatency(inferenceMs);

	// Update RTF
	const audioDurationMs = window.durationSeconds * 1000;
	appStore.setRtf(inferenceMs / audioDurationMs);

	// Advance cursor if merger advanced it
	if (result.matureCursorTime > windowBuilder.getMatureCursorTime()) {
	appStore.setMatureCursorTime(result.matureCursorTime);
	windowBuilder.advanceMatureCursorByTime(result.matureCursorTime);
	windowBuilder.markSentenceEnd(Math.round(result.matureCursorTime * 16000));
	}

	// Update stats
	appStore.setV4MergerStats({
	sentencesFinalized: result.matureSentenceCount,
	cursorUpdates: result.stats?.matureCursorUpdates \|\| 0,
	utterancesProcessed: result.stats?.utterancesProcessed \|\| 0,
	});

	// Update buffer metrics
	const ring = audioEngine.getRingBuffer();
	appStore.setBufferMetrics({
	fillRatio: ring.getFillCount() / ring.getSize(),
	latencyMs: (ring.getFillCount() / 16000) * 1000,
	});

	// Update metrics
	if (result.metrics) {
	appStore.setSystemMetrics({
	throughput: 0,
	modelConfidence: 0,
	});
	}
	} catch (err: any) {
	console.error('[v4Tick] Inference error:', err);
	} finally {
	v4InferenceBusy = false;
	}
	};

	// ---- Cleanup v4 pipeline resources ----
	const cleanupV4Pipeline = () => {
	v4TickRunning = false;
	if (v4TickTimeout) {
	clearTimeout(v4TickTimeout);
	v4TickTimeout = undefined;
	}
	if (v4AudioChunkUnsubscribe) {
	v4AudioChunkUnsubscribe();
	v4AudioChunkUnsubscribe = null;
	}
	if (v4MelChunkUnsubscribe) {
	v4MelChunkUnsubscribe();
	v4MelChunkUnsubscribe = null;
	}
	hybridVAD = null;
	if (tenVADClient) {
	tenVADClient.dispose();
	tenVADClient = null;
	}
	if (bufferClient) {
	bufferClient.dispose();
	bufferClient = null;
	}
	windowBuilder = null;
	v4InferenceBusy = false;
	v4LastInferenceTime = 0;
	v4GlobalSampleOffset = 0;
	};

	const toggleRecording = async () => {
	if (isRecording()) {
	// Update UI immediately so the stop button always takes effect even if cleanup throws
	visualizationUnsubscribe?.();
	visualizationUnsubscribe = undefined;
	appStore.stopRecording();
	appStore.setAudioLevel(0);
	appStore.setBarLevels(new Float32Array(0));

	try {
	audioEngine?.stop();

	if (segmentUnsubscribe) segmentUnsubscribe();
	if (windowUnsubscribe) windowUnsubscribe();
	if (melChunkUnsubscribe) melChunkUnsubscribe();
	cleanupV4Pipeline();

	if (workerClient) {
	const final = await workerClient.finalize();
	let text = '';
	if ('text' in final && typeof final.text === 'string') {
	text = final.text;
	} else if ('fullText' in final && typeof final.fullText === 'string') {
	text = final.fullText;
	}
	appStore.setTranscript(text);
	appStore.setPendingText('');
	}

	melClient?.reset();
	audioEngine?.reset();
	} catch (err) {
	console.warn('[App] Error during stop recording cleanup:', err);
	}
	} else {
	try {
	if (!audioEngine) {
	audioEngine = new AudioEngine({
	sampleRate: 16000,
	deviceId: appStore.selectedDeviceId(),
	});
	setAudioEngineSignal(audioEngine);
	} else {
	audioEngine.updateConfig({ deviceId: appStore.selectedDeviceId() });
	audioEngine.reset();
	}

	const mode = appStore.transcriptionMode();

	// v4 mode: Always start audio capture, mel preprocessing, and VAD
	// Inference only runs when model is ready (checked in v4Tick)
	if (mode === 'v4-utterance') {
	// ---- v4: Utterance-based pipeline with BufferWorker + TEN-VAD ----

	// Initialize merger in worker only if model is ready
	if (isModelReady() && workerClient) {
	await workerClient.initV4Service({ debug: false });
	}

	// Initialize mel worker (always needed for preprocessing)
	if (!melClient) {
	melClient = new MelWorkerClient();
	setMelClientSignal(melClient);
	}
	try {
	await melClient.init({ nMels: 128 });
	} catch (e) {
	melClient.dispose();
	melClient = null;
	setMelClientSignal(null);
	}

	// Initialize BufferWorker (centralized multi-layer data store)
	bufferClient = new BufferWorkerClient();
	const bufferConfig: BufferWorkerConfig = {
	sampleRate: 16000,
	layers: {
	audio: { hopSamples: 1, entryDimension: 1, maxDurationSec: 120 },
	mel: { hopSamples: 160, entryDimension: 128, maxDurationSec: 120 },
	energyVad: { hopSamples: 1280, entryDimension: 1, maxDurationSec: 120 },
	inferenceVad: { hopSamples: 256, entryDimension: 1, maxDurationSec: 120 },
	},
	};
	await bufferClient.init(bufferConfig);

	// Initialize TEN-VAD worker (inference-based VAD)
	tenVADClient = new TenVADWorkerClient();
	tenVADClient.onResult((result: TenVADResult) => {
	if (!bufferClient) return;
	// Batch-write hop probabilities to inferenceVad (single worker message)
	if (result.hopCount > 0) {
	const lastProb = result.probabilities[result.hopCount - 1];
	if (bufferClient.writeBatchTransfer) {
	bufferClient.writeBatchTransfer('inferenceVad', result.probabilities, result.globalSampleOffset);
	} else {
	bufferClient.writeBatch('inferenceVad', result.probabilities, result.globalSampleOffset);
	}

	// Update UI at most once per frame with the latest probability
	scheduleSileroUpdate(lastProb);
	}
	});
	// TEN-VAD init is non-blocking; falls back gracefully if WASM fails
	const wasmPath = `${import.meta.env.BASE_URL}wasm/`;
	tenVADClient.init({ hopSize: 256, threshold: 0.5, wasmPath }).catch((err) => {
	console.warn('[v4] TEN-VAD init failed, using energy-only:', err);
	});

	// Initialize hybrid VAD for energy-based detection (always runs, fast)
	hybridVAD = new HybridVAD({
	sileroThreshold: 0.5,
	onsetConfirmations: 2,
	offsetConfirmations: 3,
	sampleRate: 16000,
	});
	// Do NOT init Silero in HybridVAD (TEN-VAD replaces it)

	// NOTE: WindowBuilder is created AFTER audioEngine.start() below,
	// because start() may re-create the internal RingBuffer.

	// Reset global sample counter
	v4GlobalSampleOffset = 0;

	// Feed audio chunks to mel worker from the main v4 audio handler below
	v4MelChunkUnsubscribe = null;

	// Process each audio chunk: energy VAD + write to BufferWorker + forward to TEN-VAD
	v4AudioChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
	if (!hybridVAD \|\| !bufferClient) return;

	const chunkOffset = v4GlobalSampleOffset;
	v4GlobalSampleOffset += chunk.length;

	// 1. Run energy VAD (synchronous, fast) and write to BufferWorker
	const vadResult = hybridVAD.processEnergyOnly(chunk);
	const energyProb = vadResult.isSpeech ? 0.9 : 0.1;
	bufferClient.writeScalar('energyVad', energyProb);

	// 2. Forward audio to mel worker (copy, keep chunk for TEN-VAD transfer)
	melClient?.pushAudioCopy(chunk);

	// 3. Forward audio to TEN-VAD worker for inference-based VAD (transfer, no copy)
	if (tenVADClient?.isReady()) {
	tenVADClient.processTransfer(chunk, chunkOffset);
	}

	// 4. Update VAD state for UI
	const sileroProbability = tenVADClient?.isReady()
	? undefined
	: (vadResult.sileroProbability \|\| 0);
	scheduleVadStateUpdate({
	isSpeech: vadResult.isSpeech,
	energy: vadResult.energy,
	snr: vadResult.snr \|\| 0,
	hybridState: vadResult.state,
	...(sileroProbability !== undefined ? { sileroProbability } : {}),
	});
	});

	// Start adaptive inference tick loop (reads interval from appStore)
	// Note: v4Tick internally checks if model is ready before running inference
	v4TickRunning = true;
	const scheduleNextTick = () => {
	if (!v4TickRunning) return;
	v4TickTimeout = window.setTimeout(async () => {
	if (!v4TickRunning) return;
	await v4Tick();
	scheduleNextTick();
	}, appStore.v4InferenceIntervalMs());
	};
	scheduleNextTick();

	} else if (isModelReady() && workerClient) {
	// v3 and v2 modes still require model to be ready
	if (mode === 'v3-streaming') {
	// ---- v3: Fixed-window token streaming (existing) ----
	const windowDur = appStore.streamingWindow();
	const triggerInt = appStore.triggerInterval();
	const overlapDur = Math.max(1.0, windowDur - triggerInt);

	await workerClient.initV3Service({
	windowDuration: windowDur,
	overlapDuration: overlapDur,
	sampleRate: 16000,
	frameStride: appStore.frameStride(),
	});

	if (!melClient) {
	melClient = new MelWorkerClient();
	setMelClientSignal(melClient);
	}
	try {
	await melClient.init({ nMels: 128 });
	} catch (e) {
	melClient.dispose();
	melClient = null;
	setMelClientSignal(null);
	}

	melChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
	melClient?.pushAudioCopy(chunk);
	});

	windowUnsubscribe = audioEngine.onWindowChunk(
	windowDur,
	overlapDur,
	triggerInt,
	async (audio, startTime) => {
	if (!workerClient) return;
	const start = performance.now();

	let result;
	if (melClient) {
	const startSample = Math.round(startTime * 16000);
	const endSample = startSample + audio.length;
	const melFeatures = await melClient.getFeatures(startSample, endSample);

	if (melFeatures) {
	result = await workerClient.processV3ChunkWithFeatures(
	melFeatures.features,
	melFeatures.T,
	melFeatures.melBins,
	startTime,
	overlapDur,
	);
	} else {
	result = await workerClient.processV3Chunk(audio, startTime);
	}
	} else {
	result = await workerClient.processV3Chunk(audio, startTime);
	}

	const duration = performance.now() - start;
	const stride = appStore.triggerInterval();
	appStore.setRtf(duration / (stride * 1000));
	appStore.setInferenceLatency(duration);

	if (audioEngine) {
	const ring = audioEngine.getRingBuffer();
	appStore.setBufferMetrics({
	fillRatio: ring.getFillCount() / ring.getSize(),
	latencyMs: (ring.getFillCount() / 16000) * 1000,
	});
	}

	appStore.setMergeInfo({
	lcsLength: result.lcsLength,
	anchorValid: result.anchorValid,
	chunkCount: result.chunkCount,
	anchorTokens: result.anchorTokens
	});
	}
	);
	} else {
	// ---- v2: Per-utterance (existing) ----
	await workerClient.initService({ sampleRate: 16000 });
	segmentUnsubscribe = audioEngine.onSpeechSegment(async (segment) => {
	if (workerClient) {
	const start = Date.now();
	const samples = audioEngine!.getRingBuffer().read(segment.startFrame, segment.endFrame);
	const result = await workerClient.transcribeSegment(samples);
	if (result.text) appStore.appendTranscript(result.text + ' ');
	appStore.setInferenceLatency(Date.now() - start);
	}
	});
	}
	}

	await audioEngine.start();

	// Create WindowBuilder AFTER start() so we get the final RingBuffer reference
	// (AudioEngine.init() re-creates the RingBuffer internally)
	if (mode === 'v4-utterance') {
	windowBuilder = new WindowBuilder(
	audioEngine.getRingBuffer(),
	null, // No VADRingBuffer; hasSpeech now goes through BufferWorker
	{
	sampleRate: 16000,
	minDurationSec: 3.0,
	maxDurationSec: 30.0,
	minInitialDurationSec: 1.5,
	useVadBoundaries: false, // VAD boundaries now managed by BufferWorker
	vadSilenceThreshold: 0.3,
	debug: true, // Enable debug logging for diagnostics
	}
	);
	}

	appStore.startRecording();

	// Use same 30fps tick (onVisualizationUpdate throttled to 33ms).
	// Bar levels from AnalyserNode (native FFT, low CPU) instead of mel worker.
	visualizationUnsubscribe = audioEngine.onVisualizationUpdate((_data, metrics) => {
	appStore.setAudioLevel(metrics.currentEnergy);
	if (appStore.transcriptionMode() !== 'v4-utterance') {
	appStore.setIsSpeechDetected(audioEngine?.isSpeechActive() ?? false);
	}
	appStore.setBarLevels(audioEngine!.getBarLevels());
	});
	} catch (err: any) {
	appStore.setErrorMessage(err.message);
	}
	}
	};

	const loadSelectedModel = async () => {
	if (!workerClient) return;
	if (appStore.modelState() === 'ready') return;
	if (appStore.modelState() === 'loading') return;
	setShowContextPanel(true);
	try {
	await workerClient.initModel(appStore.selectedModelId());
	} catch (e) {
	console.error('Failed to load model:', e);
	appStore.setModelState('error');
	appStore.setErrorMessage(e instanceof Error ? e.message : String(e));
	}
	};

	const openPanelForAudio = () => {
	clearTimeout(panelHoverCloseTimeout);
	setSettingsPanelSection('audio');
	setShowContextPanel(true);
	};
	const openPanelForModel = () => {
	clearTimeout(panelHoverCloseTimeout);
	setSettingsPanelSection('model');
	setShowContextPanel(true);
	};
	const schedulePanelCloseIfHover = () => {
	panelHoverCloseTimeout = window.setTimeout(() => {
	if (settingsPanelSection() !== 'full' && appStore.modelState() !== 'loading') {
	setShowContextPanel(false);
	}
	}, 250);
	};
	const cancelPanelClose = () => clearTimeout(panelHoverCloseTimeout);
	const panelMouseLeave = () => {
	if (settingsPanelSection() !== 'full') schedulePanelCloseIfHover();
	};

	const handleLocalLoad = async (files: FileList) => {
	if (!workerClient) return;
	setShowContextPanel(true);
	try {
	await workerClient.initLocalModel(files);
	} catch (e) {
	console.error('Failed to load local model:', e);
	}
	};

	return (
	<div class="h-screen flex flex-col overflow-hidden bg-[var(--color-earthy-bg)] selection:bg-[var(--color-earthy-coral)] selection:text-white">
	<ModelLoadingOverlay
	isVisible={showModelOverlay()}
	state={appStore.modelState()}
	progress={appStore.modelProgress()}
	message={appStore.modelMessage()}
	file={appStore.modelFile()}
	backend={appStore.backend()}
	selectedModelId={appStore.selectedModelId()}
	onModelSelect={(id: string) => appStore.setSelectedModelId(id)}
	onStart={() => loadSelectedModel()}
	onLocalLoad={handleLocalLoad}
	onClose={() => setShowModelOverlay(false)}
	/>

	<Header
	onToggleDebug={() => appStore.setShowDebugPanel(!appStore.showDebugPanel())}
	/>

	<div class="flex-1 flex overflow-hidden relative">
	<main class="flex-1 overflow-y-auto custom-scrollbar px-4 sm:px-6 lg:px-10 xl:px-14 2xl:px-20 flex flex-col items-center">
	<div class="w-full max-w-[1680px] py-8 md:py-10 lg:py-12">
	<TranscriptionDisplay
	confirmedText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.matureText() : appStore.transcript()}
	pendingText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.immatureText() : appStore.pendingText()}
	sentenceEntries={appStore.v4SentenceEntries()}
	isV4Mode={appStore.transcriptionMode() === 'v4-utterance'}
	isRecording={isRecording()}
	lcsLength={appStore.mergeInfo().lcsLength}
	anchorValid={appStore.mergeInfo().anchorValid}
	showConfidence={appStore.transcriptionMode() === 'v3-streaming'}
	class="min-h-[56vh]"
	/>
	</div>
	</main>
	</div>

	{/* Draggable floating control widget */}
	<div
	class={widgetPos() !== null ? 'fixed z-30 w-full max-w-2xl px-6 select-none' : 'absolute bottom-8 left-1/2 -translate-x-1/2 z-30 w-full max-w-2xl px-6'}
	style={widgetPos() ? { left: `${widgetPos()!.x}px`, top: `${widgetPos()!.y}px` } : {}}
	>
	<div class="relative">
	{/* Settings panel: expands up or down depending on bar position vs half screen height */}
	<div
	class="absolute left-0 right-0 overflow-hidden transition-[max-height] duration-300 ease-out border border-[var(--color-earthy-sage)]/30 bg-[var(--color-earthy-bg)]/95 backdrop-blur-sm shadow-lg"
	classList={{
	'max-h-0': !showContextPanel(),
	'max-h-[70vh]': showContextPanel(),
	'bottom-full rounded-t-2xl border-b-0': settingsExpandUp(),
	'top-full rounded-b-2xl border-t-0': !settingsExpandUp(),
	}}
	onMouseEnter={cancelPanelClose}
	onMouseLeave={panelMouseLeave}
	>
	<div class="max-h-[70vh] min-h-0 flex flex-col overflow-y-auto custom-scrollbar">
	<SettingsContent
	section={settingsPanelSection()}
	onClose={() => setShowContextPanel(false)}
	onLoadModel={() => loadSelectedModel()}
	onLocalLoad={handleLocalLoad}
	onOpenDebug={() => appStore.setShowDebugPanel(true)}
	onDeviceSelect={(id) => {
	if (audioEngine) audioEngine.updateConfig({ deviceId: id });
	}}
	audioEngine={audioEngineSignal() ?? undefined}
	expandUp={settingsExpandUp}
	/>
	</div>
	</div>

	{/* Control bar: steady, fixed position; never moves when settings open */}
	<div
	class="bg-white/90 backdrop-blur-md shadow-lg border border-[var(--color-earthy-sage)]/30 rounded-2xl overflow-hidden"
	onMouseDown={handleWidgetDragStart}
	role="presentation"
	>
	<div class="p-4 flex items-center justify-between gap-6 cursor-grab active:cursor-grabbing">
	<div class="flex items-center gap-2 flex-shrink-0">
	<span class="material-symbols-outlined text-[var(--color-earthy-soft-brown)] text-lg opacity-60" aria-hidden="true">drag_indicator</span>
	<div class="flex flex-col min-w-[60px]">
	<span class="text-[10px] uppercase tracking-wider text-[var(--color-earthy-soft-brown)] font-bold">Rec</span>
	<span class="font-mono text-sm text-[var(--color-earthy-dark-brown)]">{formatDuration(appStore.sessionDuration())}</span>
	</div>
	</div>
	<div class="flex-1 min-w-0 flex flex-col justify-center gap-1">
	<div class="h-8 flex items-center justify-center gap-1 overflow-hidden opacity-80 abstract-wave">
	<CompactWaveform audioLevel={appStore.audioLevel()} barLevels={appStore.barLevels()} isRecording={isRecording()} />
	</div>
	<Show when={appStore.modelState() === 'loading'}>
	<div class="flex items-center gap-2 px-1">
	<div class="flex-1 h-1.5 rounded-full overflow-hidden bg-[var(--color-earthy-sage)]/20">
	<div
	class="h-full bg-[var(--color-earthy-muted-green)] rounded-full transition-all duration-300"
	style={{ width: `${Math.max(0, Math.min(100, appStore.modelProgress()))}%` }}
	/>
	</div>
	<span class="text-[10px] font-mono text-[var(--color-earthy-soft-brown)] tabular-nums">{Math.round(appStore.modelProgress())}%</span>
	</div>
	</Show>
	</div>
	<div class="flex items-center gap-2 flex-shrink-0">
	<button
	type="button"
	onClick={toggleRecording}
	onMouseEnter={openPanelForAudio}
	onMouseLeave={schedulePanelCloseIfHover}
	class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${isRecording() ? 'bg-[var(--color-earthy-coral)] text-white border-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
	title={isRecording() ? 'Stop recording' : 'Start recording'}
	>
	<span class="material-symbols-outlined">mic</span>
	</button>
	<button
	type="button"
	onClick={() => loadSelectedModel()}
	onMouseEnter={openPanelForModel}
	onMouseLeave={schedulePanelCloseIfHover}
	disabled={appStore.modelState() === 'loading' \|\| appStore.modelState() === 'ready'}
	class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed relative"
	title={appStore.modelState() === 'ready' ? 'Model loaded' : appStore.modelState() === 'loading' ? 'Loading...' : 'Load model'}
	>
	<Show when={appStore.modelState() === 'loading'} fallback={<span class="material-symbols-outlined">power_settings_new</span>}>
	<span class="material-symbols-outlined load-btn-spin">progress_activity</span>
	</Show>
	</button>
	<button
	type="button"
	onClick={() => { setSettingsPanelSection('full'); setShowContextPanel((v) => !v); }}
	class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${showContextPanel() ? 'bg-[var(--color-earthy-sage)]/30 text-[var(--color-earthy-muted-green)] border-[var(--color-earthy-sage)]/50' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
	title="Settings"
	>
	<span class="material-symbols-outlined">tune</span>
	</button>
	<button
	type="button"
	onClick={() => isRecording() && toggleRecording()}
	disabled={!isRecording()}
	class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed"
	title="Pause"
	>
	<span class="material-symbols-outlined">pause</span>
	</button>
	<button
	type="button"
	onClick={() => appStore.copyTranscript()}
	class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30"
	title="Copy transcript"
	>
	<span class="material-symbols-outlined">content_copy</span>
	</button>
	</div>
	</div>
	</div>
	</div>
	</div>

	{/* Foldable debug panel (bottom drawer) */}
	<Show when={appStore.showDebugPanel()}>
	<div class="absolute bottom-0 left-0 right-0 z-20 flex flex-col bg-[var(--color-earthy-bg)] border-t border-[var(--color-earthy-sage)]/30 shadow-[0_-4px_20px_rgba(0,0,0,0.08)] max-h-[70vh] overflow-hidden transition-all">
	<DebugPanel
	audioEngine={audioEngineSignal() ?? undefined}
	melClient={melClientSignal() ?? undefined}
	/>
	</div>
	</Show>
	</div>
	);
	};

	export default App;