Spaces:

ysdede
/

keet-streaming

Running

File size: 42,944 Bytes

import { Component, Show, For, createSignal, createEffect, onMount, onCleanup } from 'solid-js';
import { appStore } from './stores/appStore';
import { CompactWaveform, ModelLoadingOverlay, DebugPanel, TranscriptionDisplay, SettingsContent } from './components';
import { getModelDisplayName, MODELS } from './components/ModelLoadingOverlay';
import { AudioEngine } from './lib/audio';
import { MelWorkerClient } from './lib/audio/MelWorkerClient';
import { TranscriptionWorkerClient } from './lib/transcription';
import { HybridVAD } from './lib/vad';
import { WindowBuilder } from './lib/transcription/WindowBuilder';
import { BufferWorkerClient } from './lib/buffer';
import { TenVADWorkerClient } from './lib/vad/TenVADWorkerClient';
import type { V4ProcessResult } from './lib/transcription/TranscriptionWorkerClient';
import type { BufferWorkerConfig, TenVADResult } from './lib/buffer/types';
import { formatDuration } from './utils/time';

// Singleton instances
let audioEngine: AudioEngine | null = null;
export const [audioEngineSignal, setAudioEngineSignal] = createSignal<AudioEngine | null>(null);

let workerClient: TranscriptionWorkerClient | null = null;
let melClient: MelWorkerClient | null = null;
export const [melClientSignal, setMelClientSignal] = createSignal<MelWorkerClient | null>(null);
let segmentUnsubscribe: (() => void) | null = null;
let windowUnsubscribe: (() => void) | null = null;
let melChunkUnsubscribe: (() => void) | null = null;
let visualizationUnsubscribe: (() => void) | undefined;
// v4 pipeline instances
let hybridVAD: HybridVAD | null = null;
let bufferClient: BufferWorkerClient | null = null;
let tenVADClient: TenVADWorkerClient | null = null;
let windowBuilder: WindowBuilder | null = null;
let v4TickTimeout: number | undefined;
let v4TickRunning = false;
let v4AudioChunkUnsubscribe: (() => void) | null = null;
let v4MelChunkUnsubscribe: (() => void) | null = null;
let v4InferenceBusy = false;
let v4LastInferenceTime = 0;
// Global sample counter for audio chunks (tracks total samples written to BufferWorker)
let v4GlobalSampleOffset = 0;
// Throttle UI updates from TEN-VAD to at most once per frame
let pendingSileroProb: number | null = null;
let sileroUpdateScheduled = false;
let pendingVadState: {
  isSpeech: boolean;
  energy: number;
  snr: number;
  hybridState: string;
  sileroProbability?: number;
} | null = null;
let vadUpdateScheduled = false;

const scheduleSileroUpdate = (prob: number) => {
  pendingSileroProb = prob;
  if (sileroUpdateScheduled) return;
  sileroUpdateScheduled = true;
  requestAnimationFrame(() => {
    sileroUpdateScheduled = false;
    if (pendingSileroProb === null) return;
    const currentState = appStore.vadState();
    appStore.setVadState({
      ...currentState,
      sileroProbability: pendingSileroProb,
    });
  });
};

const scheduleVadStateUpdate = (next: {
  isSpeech: boolean;
  energy: number;
  snr: number;
  hybridState: string;
  sileroProbability?: number;
}) => {
  pendingVadState = next;
  if (vadUpdateScheduled) return;
  vadUpdateScheduled = true;
  requestAnimationFrame(() => {
    vadUpdateScheduled = false;
    if (!pendingVadState) return;
    const currentState = appStore.vadState();
    const sileroProbability =
      pendingVadState.sileroProbability !== undefined
        ? pendingVadState.sileroProbability
        : currentState.sileroProbability;
    appStore.setVadState({
      ...currentState,
      ...pendingVadState,
      sileroProbability,
    });
    appStore.setIsSpeechDetected(pendingVadState.isSpeech);
    pendingVadState = null;
  });
};

const Header: Component<{
  onToggleDebug: () => void;
}> = (props) => {
  const sessionLabel = () =>
    appStore.modelState() === 'ready' ? getModelDisplayName(appStore.selectedModelId()) : 'Session';
  return (
    <header class="h-20 flex items-center justify-between px-8 bg-[var(--color-earthy-bg)]/80 backdrop-blur-sm z-30 shrink-0">
      <div class="flex items-center gap-6">
        <div class="flex items-center gap-3">
          <div class="w-10 h-10 rounded-full bg-[var(--color-earthy-muted-green)] flex items-center justify-center text-white">
            <span class="material-symbols-outlined text-xl">auto_awesome</span>
          </div>
          <div>
            <h1 class="text-lg font-semibold tracking-tight text-[var(--color-earthy-dark-brown)]">keet</h1>
            <p class="text-[10px] uppercase tracking-[0.2em] text-[var(--color-earthy-soft-brown)] font-medium">{sessionLabel()}</p>
          </div>
        </div>
      </div>
      <div class="flex items-center gap-4">
        <button
          type="button"
          onClick={props.onToggleDebug}
          class={`p-2 rounded-full transition-colors ${appStore.showDebugPanel() ? 'bg-[var(--color-earthy-muted-green)] text-white' : 'text-[var(--color-earthy-muted-green)] hover:bg-[var(--color-earthy-sage)]/30'}`}
          title={appStore.showDebugPanel() ? 'Hide debug panel' : 'Show debug panel'}
          aria-label="Toggle debug panel"
        >
          <span class="material-symbols-outlined">bug_report</span>
        </button>
        <button
          type="button"
          class="p-2 text-[var(--color-earthy-muted-green)] hover:scale-110 transition-transform"
          aria-label="More options"
        >
          <span class="material-symbols-outlined">more_vert</span>
        </button>
      </div>
    </header>
  );
};

const WIDGET_STORAGE_KEY = 'boncukjs-control-widget-pos';
const WIDGET_MAX_W = 672;
const WIDGET_MIN_H = 80;

const App: Component = () => {
  const [showModelOverlay, setShowModelOverlay] = createSignal(false);
  const [showContextPanel, setShowContextPanel] = createSignal(false);
  type SettingsPanelSection = 'full' | 'audio' | 'model';
  const [settingsPanelSection, setSettingsPanelSection] = createSignal<SettingsPanelSection>('full');
  let panelHoverCloseTimeout: number | undefined;
  const [workerReady, setWorkerReady] = createSignal(false);
  const [widgetPos, setWidgetPos] = createSignal<{ x: number; y: number } | null>(null);
  const [isDragging, setIsDragging] = createSignal(false);

  const isRecording = () => appStore.recordingState() === 'recording';
  const isModelReady = () => appStore.modelState() === 'ready';

  let dragStart = { x: 0, y: 0 };
  let posStart = { x: 0, y: 0 };

  const [windowHeight, setWindowHeight] = createSignal(typeof window !== 'undefined' ? window.innerHeight : 600);
  const settingsExpandUp = () => {
    const pos = widgetPos();
    if (!pos) return true;
    return pos.y >= windowHeight() / 2;
  };

  const handleWidgetDragStart = (e: MouseEvent) => {
    const target = e.target;
    if (target instanceof Element && target.closest('button, select, input')) return;
    e.preventDefault();
    const pos = widgetPos();
    if (!pos) return;
    setIsDragging(true);
    dragStart = { x: e.clientX, y: e.clientY };
    posStart = { ...pos };
    const onMove = (e2: MouseEvent) => {
      const dx = e2.clientX - dragStart.x;
      const dy = e2.clientY - dragStart.y;
      const w = typeof window !== 'undefined' ? window.innerWidth : 800;
      const h = typeof window !== 'undefined' ? window.innerHeight : 600;
      const newX = Math.max(0, Math.min(w - WIDGET_MAX_W, posStart.x + dx));
      const newY = Math.max(0, Math.min(h - WIDGET_MIN_H, posStart.y + dy));
      setWidgetPos({ x: newX, y: newY });
    };
    const onUp = () => {
      setIsDragging(false);
      window.removeEventListener('mousemove', onMove);
      window.removeEventListener('mouseup', onUp);
      const p = widgetPos();
      if (p && typeof localStorage !== 'undefined') {
        try {
          localStorage.setItem(WIDGET_STORAGE_KEY, JSON.stringify(p));
        } catch (_) {}
      }
    };
    window.addEventListener('mousemove', onMove);
    window.addEventListener('mouseup', onUp);
  };

  createEffect(() => {
    if (!showContextPanel()) return;
    const handler = (e: KeyboardEvent) => {
      if (e.key === 'Escape') {
        e.preventDefault();
        setShowContextPanel(false);
      }
    };
    document.addEventListener('keydown', handler);
    return () => document.removeEventListener('keydown', handler);
  });

  createEffect(() => {
    if (appStore.modelState() === 'ready' && showContextPanel() && settingsPanelSection() === 'model') {
      setShowContextPanel(false);
    }
  });

  onMount(() => {
    const onResize = () => setWindowHeight(window.innerHeight);
    window.addEventListener('resize', onResize);

    const stored =
      typeof localStorage !== 'undefined' ? localStorage.getItem(WIDGET_STORAGE_KEY) : null;
    let posRestored = false;
    if (stored) {
      try {
        const parsed = JSON.parse(stored) as { x: number; y: number };
        if (Number.isFinite(parsed.x) && Number.isFinite(parsed.y)) {
          setWidgetPos({ x: parsed.x, y: parsed.y });
          posRestored = true;
        }
      } catch (_) {}
    }
    if (!posRestored) {
      const w = window.innerWidth;
      const h = window.innerHeight;
      setWidgetPos({
        x: Math.max(0, (w - WIDGET_MAX_W) / 2),
        y: h - 140,
      });
    }

    workerClient = new TranscriptionWorkerClient();

    workerClient.onModelProgress = (p) => {
      appStore.setModelProgress(p.progress);
      appStore.setModelMessage(p.message || '');
      if (p.file) appStore.setModelFile(p.file);
    };

    workerClient.onModelStateChange = (s) => {
      appStore.setModelState(s);
    };

    workerClient.onV3Confirmed = (text) => {
      appStore.setTranscript(text);
    };

    workerClient.onV3Pending = (text) => {
      appStore.setPendingText(text);
    };

    workerClient.onError = (msg) => {
      appStore.setErrorMessage(msg);
    };

    appStore.refreshDevices();
    setWorkerReady(true);

    return () => window.removeEventListener('resize', onResize);
  });

  // No longer auto-show blocking model overlay; model selection is in the settings panel.
  // createEffect(() => { ... setShowModelOverlay(true); });

  onCleanup(() => {
    clearTimeout(panelHoverCloseTimeout);
    visualizationUnsubscribe?.();
    cleanupV4Pipeline();
    melClient?.dispose();
    workerClient?.dispose();
  });

  // ---- v4 pipeline tick: periodic window building + inference ----
  let v4TickCount = 0;
  let v4ModelNotReadyLogged = false;
  const v4Tick = async () => {
    if (!workerClient || !windowBuilder || !audioEngine || !bufferClient || v4InferenceBusy) return;

    // Skip inference if model is not ready (but still allow audio/mel/VAD to process)
    if (appStore.modelState() !== 'ready') {
      if (!v4ModelNotReadyLogged) {
        console.log('[v4Tick] Model not ready yet - audio is being captured and preprocessed');
        v4ModelNotReadyLogged = true;
      }
      return;
    }
    // Reset the flag once model becomes ready
    if (v4ModelNotReadyLogged) {
      console.log('[v4Tick] Model is now ready - starting inference');
      v4ModelNotReadyLogged = false;
      // Initialize the v4 service now that model is ready
      await workerClient.initV4Service({ debug: false });
    }

    v4TickCount++;
    const now = performance.now();
    // Use the store's configurable inference interval (minus a small margin for the tick jitter)
    const minInterval = Math.max(200, appStore.v4InferenceIntervalMs() - 100);
    if (now - v4LastInferenceTime < minInterval) return;

    // Check if there is speech via the BufferWorker (async query).
    // We check both energy and inference VAD layers; either one detecting speech triggers inference.
    const cursorSample = windowBuilder.getMatureCursorFrame(); // frame === sample in our pipeline
    const currentSample = v4GlobalSampleOffset;
    const startSample = cursorSample > 0 ? cursorSample : 0;

    let hasSpeech = false;
    if (currentSample > startSample) {
      // Check energy VAD first (always available, low latency)
      const energyResult = await bufferClient.hasSpeech('energyVad', startSample, currentSample, 0.3);

      // When inference VAD is ready, require BOTH energy AND inference to agree
      // This prevents false positives from music/noise that has high energy but no speech
      if (tenVADClient?.isReady()) {
        const inferenceResult = await bufferClient.hasSpeech('inferenceVad', startSample, currentSample, 0.5);
        // Require both energy and inference VAD to agree (AND logic)
        hasSpeech = energyResult.hasSpeech && inferenceResult.hasSpeech;
      } else {
        // Fall back to energy-only if inference VAD is not available
        hasSpeech = energyResult.hasSpeech;
      }
    }

    if (v4TickCount <= 5 || v4TickCount % 20 === 0) {
      const vadState = appStore.vadState();
      const rb = audioEngine.getRingBuffer();
      const rbFrame = rb.getCurrentFrame();
      const rbBase = rb.getBaseFrameOffset();
      console.log(
        `[v4Tick #${v4TickCount}] hasSpeech=${hasSpeech}, vadState=${vadState.hybridState}, ` +
        `energy=${vadState.energy.toFixed(4)}, inferenceVAD=${(vadState.sileroProbability || 0).toFixed(2)}, ` +
        `samples=[${startSample}:${currentSample}], ` +
        `ringBuf=[base=${rbBase}, head=${rbFrame}, avail=${rbFrame - rbBase}]`
      );
    }

    // Periodic buffer worker state dump (every 40 ticks)
    if (v4TickCount % 40 === 0 && bufferClient) {
      try {
        const state = await bufferClient.getState();
        const layerSummary = Object.entries(state.layers)
          .map(([id, l]) => `${id}:${l.fillCount}/${l.maxEntries}@${l.currentSample}`)
          .join(', ');
        console.log(`[v4Tick #${v4TickCount}] BufferState: ${layerSummary}`);
      } catch (_) { /* ignore state query errors */ }
    }

    if (!hasSpeech) {
      // Check for silence-based flush using BufferWorker
      const silenceDuration = await bufferClient.getSilenceTailDuration('energyVad', 0.3);
      if (silenceDuration >= appStore.v4SilenceFlushSec()) {
        // Flush pending sentence via timeout finalization
        try {
          const flushResult = await workerClient.v4FinalizeTimeout();
          if (flushResult) {
            appStore.setMatureText(flushResult.matureText);
            appStore.setImmatureText(flushResult.immatureText);
            appStore.setMatureCursorTime(flushResult.matureCursorTime);
            appStore.setTranscript(flushResult.fullText);
            appStore.appendV4SentenceEntries(flushResult.matureSentences);
            appStore.setV4MergerStats({
              sentencesFinalized: flushResult.matureSentenceCount,
              cursorUpdates: flushResult.stats?.matureCursorUpdates || 0,
              utterancesProcessed: flushResult.stats?.utterancesProcessed || 0,
            });
            // Advance window builder cursor
            windowBuilder.advanceMatureCursorByTime(flushResult.matureCursorTime);
          }
        } catch (err) {
          console.error('[v4Tick] Flush error:', err);
        }
      }
      return;
    }

    // Build window from cursor to current position
    const window = windowBuilder.buildWindow();
    if (!window) {
      if (v4TickCount <= 10 || v4TickCount % 20 === 0) {
        const rb = audioEngine.getRingBuffer();
        const rbHead = rb.getCurrentFrame();
        const rbBase = rb.getBaseFrameOffset();
        console.log(
          `[v4Tick #${v4TickCount}] buildWindow=null, ` +
          `ringBuf=[base=${rbBase}, head=${rbHead}, avail=${rbHead - rbBase}], ` +
          `cursor=${windowBuilder.getMatureCursorFrame()}`
        );
      }
      return;
    }

    console.log(`[v4Tick #${v4TickCount}] Window [${window.startFrame}:${window.endFrame}] ${window.durationSeconds.toFixed(2)}s (initial=${window.isInitial})`);

    v4InferenceBusy = true;
    v4LastInferenceTime = now;

    try {
      const inferenceStart = performance.now();

      // Get mel features for the window
      let features: { features: Float32Array; T: number; melBins: number } | null = null;
      if (melClient) {
        features = await melClient.getFeatures(window.startFrame, window.endFrame);
      }

      if (!features) {
        v4InferenceBusy = false;
        return;
      }

      // Calculate time offset for absolute timestamps
      const timeOffset = window.startFrame / 16000;

      // Calculate incremental cache parameters
      const cursorFrame = windowBuilder.getMatureCursorFrame();
      const prefixSeconds = cursorFrame > 0 ? (window.startFrame - cursorFrame) / 16000 : 0;

      const result: V4ProcessResult = await workerClient.processV4ChunkWithFeatures({
        features: features.features,
        T: features.T,
        melBins: features.melBins,
        timeOffset,
        endTime: window.endFrame / 16000,
        segmentId: `v4_${Date.now()}`,
        incrementalCache: prefixSeconds > 0 ? {
          cacheKey: 'v4-stream',
          prefixSeconds,
        } : undefined,
      });

      const inferenceMs = performance.now() - inferenceStart;

      // Update UI state
      appStore.setMatureText(result.matureText);
      appStore.setImmatureText(result.immatureText);
      appStore.setTranscript(result.fullText);
      appStore.setPendingText(result.immatureText);
      appStore.appendV4SentenceEntries(result.matureSentences);
      appStore.setInferenceLatency(inferenceMs);

      // Update RTF
      const audioDurationMs = window.durationSeconds * 1000;
      appStore.setRtf(inferenceMs / audioDurationMs);

      // Advance cursor if merger advanced it
      if (result.matureCursorTime > windowBuilder.getMatureCursorTime()) {
        appStore.setMatureCursorTime(result.matureCursorTime);
        windowBuilder.advanceMatureCursorByTime(result.matureCursorTime);
        windowBuilder.markSentenceEnd(Math.round(result.matureCursorTime * 16000));
      }

      // Update stats
      appStore.setV4MergerStats({
        sentencesFinalized: result.matureSentenceCount,
        cursorUpdates: result.stats?.matureCursorUpdates || 0,
        utterancesProcessed: result.stats?.utterancesProcessed || 0,
      });

      // Update buffer metrics
      const ring = audioEngine.getRingBuffer();
      appStore.setBufferMetrics({
        fillRatio: ring.getFillCount() / ring.getSize(),
        latencyMs: (ring.getFillCount() / 16000) * 1000,
      });

      // Update metrics
      if (result.metrics) {
        appStore.setSystemMetrics({
          throughput: 0,
          modelConfidence: 0,
        });
      }
    } catch (err: any) {
      console.error('[v4Tick] Inference error:', err);
    } finally {
      v4InferenceBusy = false;
    }
  };

  // ---- Cleanup v4 pipeline resources ----
  const cleanupV4Pipeline = () => {
    v4TickRunning = false;
    if (v4TickTimeout) {
      clearTimeout(v4TickTimeout);
      v4TickTimeout = undefined;
    }
    if (v4AudioChunkUnsubscribe) {
      v4AudioChunkUnsubscribe();
      v4AudioChunkUnsubscribe = null;
    }
    if (v4MelChunkUnsubscribe) {
      v4MelChunkUnsubscribe();
      v4MelChunkUnsubscribe = null;
    }
    hybridVAD = null;
    if (tenVADClient) {
      tenVADClient.dispose();
      tenVADClient = null;
    }
    if (bufferClient) {
      bufferClient.dispose();
      bufferClient = null;
    }
    windowBuilder = null;
    v4InferenceBusy = false;
    v4LastInferenceTime = 0;
    v4GlobalSampleOffset = 0;
  };

  const toggleRecording = async () => {
    if (isRecording()) {
      // Update UI immediately so the stop button always takes effect even if cleanup throws
      visualizationUnsubscribe?.();
      visualizationUnsubscribe = undefined;
      appStore.stopRecording();
      appStore.setAudioLevel(0);
      appStore.setBarLevels(new Float32Array(0));

      try {
        audioEngine?.stop();

        if (segmentUnsubscribe) segmentUnsubscribe();
        if (windowUnsubscribe) windowUnsubscribe();
        if (melChunkUnsubscribe) melChunkUnsubscribe();
        cleanupV4Pipeline();

        if (workerClient) {
          const final = await workerClient.finalize();
          let text = '';
          if ('text' in final && typeof final.text === 'string') {
            text = final.text;
          } else if ('fullText' in final && typeof final.fullText === 'string') {
            text = final.fullText;
          }
          appStore.setTranscript(text);
          appStore.setPendingText('');
        }

        melClient?.reset();
        audioEngine?.reset();
      } catch (err) {
        console.warn('[App] Error during stop recording cleanup:', err);
      }
    } else {
      try {
        if (!audioEngine) {
          audioEngine = new AudioEngine({
            sampleRate: 16000,
            deviceId: appStore.selectedDeviceId(),
          });
          setAudioEngineSignal(audioEngine);
        } else {
          audioEngine.updateConfig({ deviceId: appStore.selectedDeviceId() });
          audioEngine.reset();
        }

        const mode = appStore.transcriptionMode();

        // v4 mode: Always start audio capture, mel preprocessing, and VAD
        // Inference only runs when model is ready (checked in v4Tick)
        if (mode === 'v4-utterance') {
          // ---- v4: Utterance-based pipeline with BufferWorker + TEN-VAD ----

          // Initialize merger in worker only if model is ready
          if (isModelReady() && workerClient) {
            await workerClient.initV4Service({ debug: false });
          }

          // Initialize mel worker (always needed for preprocessing)
          if (!melClient) {
            melClient = new MelWorkerClient();
            setMelClientSignal(melClient);
          }
          try {
            await melClient.init({ nMels: 128 });
          } catch (e) {
            melClient.dispose();
            melClient = null;
            setMelClientSignal(null);
          }

          // Initialize BufferWorker (centralized multi-layer data store)
          bufferClient = new BufferWorkerClient();
          const bufferConfig: BufferWorkerConfig = {
            sampleRate: 16000,
            layers: {
              audio: { hopSamples: 1, entryDimension: 1, maxDurationSec: 120 },
              mel: { hopSamples: 160, entryDimension: 128, maxDurationSec: 120 },
              energyVad: { hopSamples: 1280, entryDimension: 1, maxDurationSec: 120 },
              inferenceVad: { hopSamples: 256, entryDimension: 1, maxDurationSec: 120 },
            },
          };
          await bufferClient.init(bufferConfig);

          // Initialize TEN-VAD worker (inference-based VAD)
          tenVADClient = new TenVADWorkerClient();
          tenVADClient.onResult((result: TenVADResult) => {
            if (!bufferClient) return;
            // Batch-write hop probabilities to inferenceVad (single worker message)
            if (result.hopCount > 0) {
              const lastProb = result.probabilities[result.hopCount - 1];
              if (bufferClient.writeBatchTransfer) {
                bufferClient.writeBatchTransfer('inferenceVad', result.probabilities, result.globalSampleOffset);
              } else {
                bufferClient.writeBatch('inferenceVad', result.probabilities, result.globalSampleOffset);
              }

              // Update UI at most once per frame with the latest probability
              scheduleSileroUpdate(lastProb);
            }
          });
          // TEN-VAD init is non-blocking; falls back gracefully if WASM fails
          const wasmPath = `${import.meta.env.BASE_URL}wasm/`;
          tenVADClient.init({ hopSize: 256, threshold: 0.5, wasmPath }).catch((err) => {
            console.warn('[v4] TEN-VAD init failed, using energy-only:', err);
          });

          // Initialize hybrid VAD for energy-based detection (always runs, fast)
          hybridVAD = new HybridVAD({
            sileroThreshold: 0.5,
            onsetConfirmations: 2,
            offsetConfirmations: 3,
            sampleRate: 16000,
          });
          // Do NOT init Silero in HybridVAD (TEN-VAD replaces it)

          // NOTE: WindowBuilder is created AFTER audioEngine.start() below,
          // because start() may re-create the internal RingBuffer.

          // Reset global sample counter
          v4GlobalSampleOffset = 0;

          // Feed audio chunks to mel worker from the main v4 audio handler below
          v4MelChunkUnsubscribe = null;

          // Process each audio chunk: energy VAD + write to BufferWorker + forward to TEN-VAD
          v4AudioChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
            if (!hybridVAD || !bufferClient) return;

            const chunkOffset = v4GlobalSampleOffset;
            v4GlobalSampleOffset += chunk.length;

            // 1. Run energy VAD (synchronous, fast) and write to BufferWorker
            const vadResult = hybridVAD.processEnergyOnly(chunk);
            const energyProb = vadResult.isSpeech ? 0.9 : 0.1;
            bufferClient.writeScalar('energyVad', energyProb);

            // 2. Forward audio to mel worker (copy, keep chunk for TEN-VAD transfer)
            melClient?.pushAudioCopy(chunk);

            // 3. Forward audio to TEN-VAD worker for inference-based VAD (transfer, no copy)
            if (tenVADClient?.isReady()) {
              tenVADClient.processTransfer(chunk, chunkOffset);
            }

            // 4. Update VAD state for UI
            const sileroProbability = tenVADClient?.isReady()
              ? undefined
              : (vadResult.sileroProbability || 0);
            scheduleVadStateUpdate({
              isSpeech: vadResult.isSpeech,
              energy: vadResult.energy,
              snr: vadResult.snr || 0,
              hybridState: vadResult.state,
              ...(sileroProbability !== undefined ? { sileroProbability } : {}),
            });
          });

          // Start adaptive inference tick loop (reads interval from appStore)
          // Note: v4Tick internally checks if model is ready before running inference
          v4TickRunning = true;
          const scheduleNextTick = () => {
            if (!v4TickRunning) return;
            v4TickTimeout = window.setTimeout(async () => {
              if (!v4TickRunning) return;
              await v4Tick();
              scheduleNextTick();
            }, appStore.v4InferenceIntervalMs());
          };
          scheduleNextTick();

        } else if (isModelReady() && workerClient) {
          // v3 and v2 modes still require model to be ready
          if (mode === 'v3-streaming') {
            // ---- v3: Fixed-window token streaming (existing) ----
            const windowDur = appStore.streamingWindow();
            const triggerInt = appStore.triggerInterval();
            const overlapDur = Math.max(1.0, windowDur - triggerInt);

            await workerClient.initV3Service({
              windowDuration: windowDur,
              overlapDuration: overlapDur,
              sampleRate: 16000,
              frameStride: appStore.frameStride(),
            });

            if (!melClient) {
              melClient = new MelWorkerClient();
              setMelClientSignal(melClient);
            }
            try {
              await melClient.init({ nMels: 128 });
            } catch (e) {
              melClient.dispose();
              melClient = null;
              setMelClientSignal(null);
            }

            melChunkUnsubscribe = audioEngine.onAudioChunk((chunk) => {
              melClient?.pushAudioCopy(chunk);
            });

            windowUnsubscribe = audioEngine.onWindowChunk(
              windowDur,
              overlapDur,
              triggerInt,
              async (audio, startTime) => {
                if (!workerClient) return;
                const start = performance.now();

                let result;
                if (melClient) {
                  const startSample = Math.round(startTime * 16000);
                  const endSample = startSample + audio.length;
                  const melFeatures = await melClient.getFeatures(startSample, endSample);

                  if (melFeatures) {
                    result = await workerClient.processV3ChunkWithFeatures(
                      melFeatures.features,
                      melFeatures.T,
                      melFeatures.melBins,
                      startTime,
                      overlapDur,
                    );
                  } else {
                    result = await workerClient.processV3Chunk(audio, startTime);
                  }
                } else {
                  result = await workerClient.processV3Chunk(audio, startTime);
                }

                const duration = performance.now() - start;
                const stride = appStore.triggerInterval();
                appStore.setRtf(duration / (stride * 1000));
                appStore.setInferenceLatency(duration);

                if (audioEngine) {
                  const ring = audioEngine.getRingBuffer();
                  appStore.setBufferMetrics({
                    fillRatio: ring.getFillCount() / ring.getSize(),
                    latencyMs: (ring.getFillCount() / 16000) * 1000,
                  });
                }

                appStore.setMergeInfo({
                  lcsLength: result.lcsLength,
                  anchorValid: result.anchorValid,
                  chunkCount: result.chunkCount,
                  anchorTokens: result.anchorTokens
                });
              }
            );
          } else {
            // ---- v2: Per-utterance (existing) ----
            await workerClient.initService({ sampleRate: 16000 });
            segmentUnsubscribe = audioEngine.onSpeechSegment(async (segment) => {
              if (workerClient) {
                const start = Date.now();
                const samples = audioEngine!.getRingBuffer().read(segment.startFrame, segment.endFrame);
                const result = await workerClient.transcribeSegment(samples);
                if (result.text) appStore.appendTranscript(result.text + ' ');
                appStore.setInferenceLatency(Date.now() - start);
              }
            });
          }
        }

        await audioEngine.start();

        // Create WindowBuilder AFTER start() so we get the final RingBuffer reference
        // (AudioEngine.init() re-creates the RingBuffer internally)
        if (mode === 'v4-utterance') {
          windowBuilder = new WindowBuilder(
            audioEngine.getRingBuffer(),
            null, // No VADRingBuffer; hasSpeech now goes through BufferWorker
            {
              sampleRate: 16000,
              minDurationSec: 3.0,
              maxDurationSec: 30.0,
              minInitialDurationSec: 1.5,
              useVadBoundaries: false, // VAD boundaries now managed by BufferWorker
              vadSilenceThreshold: 0.3,
              debug: true, // Enable debug logging for diagnostics
            }
          );
        }

        appStore.startRecording();

        // Use same 30fps tick (onVisualizationUpdate throttled to 33ms).
        // Bar levels from AnalyserNode (native FFT, low CPU) instead of mel worker.
        visualizationUnsubscribe = audioEngine.onVisualizationUpdate((_data, metrics) => {
          appStore.setAudioLevel(metrics.currentEnergy);
          if (appStore.transcriptionMode() !== 'v4-utterance') {
            appStore.setIsSpeechDetected(audioEngine?.isSpeechActive() ?? false);
          }
          appStore.setBarLevels(audioEngine!.getBarLevels());
        });
      } catch (err: any) {
        appStore.setErrorMessage(err.message);
      }
    }
  };

  const loadSelectedModel = async () => {
    if (!workerClient) return;
    if (appStore.modelState() === 'ready') return;
    if (appStore.modelState() === 'loading') return;
    setShowContextPanel(true);
    try {
      await workerClient.initModel(appStore.selectedModelId());
    } catch (e) {
      console.error('Failed to load model:', e);
      appStore.setModelState('error');
      appStore.setErrorMessage(e instanceof Error ? e.message : String(e));
    }
  };

  const openPanelForAudio = () => {
    clearTimeout(panelHoverCloseTimeout);
    setSettingsPanelSection('audio');
    setShowContextPanel(true);
  };
  const openPanelForModel = () => {
    clearTimeout(panelHoverCloseTimeout);
    setSettingsPanelSection('model');
    setShowContextPanel(true);
  };
  const schedulePanelCloseIfHover = () => {
    panelHoverCloseTimeout = window.setTimeout(() => {
      if (settingsPanelSection() !== 'full' && appStore.modelState() !== 'loading') {
        setShowContextPanel(false);
      }
    }, 250);
  };
  const cancelPanelClose = () => clearTimeout(panelHoverCloseTimeout);
  const panelMouseLeave = () => {
    if (settingsPanelSection() !== 'full') schedulePanelCloseIfHover();
  };

  const handleLocalLoad = async (files: FileList) => {
    if (!workerClient) return;
    setShowContextPanel(true);
    try {
      await workerClient.initLocalModel(files);
    } catch (e) {
      console.error('Failed to load local model:', e);
    }
  };

  return (
    <div class="h-screen flex flex-col overflow-hidden bg-[var(--color-earthy-bg)] selection:bg-[var(--color-earthy-coral)] selection:text-white">
      <ModelLoadingOverlay
        isVisible={showModelOverlay()}
        state={appStore.modelState()}
        progress={appStore.modelProgress()}
        message={appStore.modelMessage()}
        file={appStore.modelFile()}
        backend={appStore.backend()}
        selectedModelId={appStore.selectedModelId()}
        onModelSelect={(id: string) => appStore.setSelectedModelId(id)}
        onStart={() => loadSelectedModel()}
        onLocalLoad={handleLocalLoad}
        onClose={() => setShowModelOverlay(false)}
      />

      <Header
        onToggleDebug={() => appStore.setShowDebugPanel(!appStore.showDebugPanel())}
      />

      <div class="flex-1 flex overflow-hidden relative">
        <main class="flex-1 overflow-y-auto custom-scrollbar px-4 sm:px-6 lg:px-10 xl:px-14 2xl:px-20 flex flex-col items-center">
          <div class="w-full max-w-[1680px] py-8 md:py-10 lg:py-12">
            <TranscriptionDisplay
              confirmedText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.matureText() : appStore.transcript()}
              pendingText={appStore.transcriptionMode() === 'v4-utterance' ? appStore.immatureText() : appStore.pendingText()}
              sentenceEntries={appStore.v4SentenceEntries()}
              isV4Mode={appStore.transcriptionMode() === 'v4-utterance'}
              isRecording={isRecording()}
              lcsLength={appStore.mergeInfo().lcsLength}
              anchorValid={appStore.mergeInfo().anchorValid}
              showConfidence={appStore.transcriptionMode() === 'v3-streaming'}
              class="min-h-[56vh]"
            />
          </div>
        </main>
      </div>

      {/* Draggable floating control widget */}
      <div
        class={widgetPos() !== null ? 'fixed z-30 w-full max-w-2xl px-6 select-none' : 'absolute bottom-8 left-1/2 -translate-x-1/2 z-30 w-full max-w-2xl px-6'}
        style={widgetPos() ? { left: `${widgetPos()!.x}px`, top: `${widgetPos()!.y}px` } : {}}
      >
        <div class="relative">
          {/* Settings panel: expands up or down depending on bar position vs half screen height */}
          <div
            class="absolute left-0 right-0 overflow-hidden transition-[max-height] duration-300 ease-out border border-[var(--color-earthy-sage)]/30 bg-[var(--color-earthy-bg)]/95 backdrop-blur-sm shadow-lg"
            classList={{
              'max-h-0': !showContextPanel(),
              'max-h-[70vh]': showContextPanel(),
              'bottom-full rounded-t-2xl border-b-0': settingsExpandUp(),
              'top-full rounded-b-2xl border-t-0': !settingsExpandUp(),
            }}
            onMouseEnter={cancelPanelClose}
            onMouseLeave={panelMouseLeave}
          >
            <div class="max-h-[70vh] min-h-0 flex flex-col overflow-y-auto custom-scrollbar">
              <SettingsContent
                section={settingsPanelSection()}
                onClose={() => setShowContextPanel(false)}
                onLoadModel={() => loadSelectedModel()}
                onLocalLoad={handleLocalLoad}
                onOpenDebug={() => appStore.setShowDebugPanel(true)}
                onDeviceSelect={(id) => {
                  if (audioEngine) audioEngine.updateConfig({ deviceId: id });
                }}
                audioEngine={audioEngineSignal() ?? undefined}
                expandUp={settingsExpandUp}
              />
            </div>
          </div>

          {/* Control bar: steady, fixed position; never moves when settings open */}
          <div
            class="bg-white/90 backdrop-blur-md shadow-lg border border-[var(--color-earthy-sage)]/30 rounded-2xl overflow-hidden"
            onMouseDown={handleWidgetDragStart}
            role="presentation"
          >
            <div class="p-4 flex items-center justify-between gap-6 cursor-grab active:cursor-grabbing">
            <div class="flex items-center gap-2 flex-shrink-0">
              <span class="material-symbols-outlined text-[var(--color-earthy-soft-brown)] text-lg opacity-60" aria-hidden="true">drag_indicator</span>
              <div class="flex flex-col min-w-[60px]">
                <span class="text-[10px] uppercase tracking-wider text-[var(--color-earthy-soft-brown)] font-bold">Rec</span>
                <span class="font-mono text-sm text-[var(--color-earthy-dark-brown)]">{formatDuration(appStore.sessionDuration())}</span>
              </div>
            </div>
            <div class="flex-1 min-w-0 flex flex-col justify-center gap-1">
              <div class="h-8 flex items-center justify-center gap-1 overflow-hidden opacity-80 abstract-wave">
                <CompactWaveform audioLevel={appStore.audioLevel()} barLevels={appStore.barLevels()} isRecording={isRecording()} />
              </div>
              <Show when={appStore.modelState() === 'loading'}>
                <div class="flex items-center gap-2 px-1">
                  <div class="flex-1 h-1.5 rounded-full overflow-hidden bg-[var(--color-earthy-sage)]/20">
                    <div
                      class="h-full bg-[var(--color-earthy-muted-green)] rounded-full transition-all duration-300"
                      style={{ width: `${Math.max(0, Math.min(100, appStore.modelProgress()))}%` }}
                    />
                  </div>
                  <span class="text-[10px] font-mono text-[var(--color-earthy-soft-brown)] tabular-nums">{Math.round(appStore.modelProgress())}%</span>
                </div>
              </Show>
            </div>
            <div class="flex items-center gap-2 flex-shrink-0">
              <button
                type="button"
                onClick={toggleRecording}
                onMouseEnter={openPanelForAudio}
                onMouseLeave={schedulePanelCloseIfHover}
                class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${isRecording() ? 'bg-[var(--color-earthy-coral)] text-white border-[var(--color-earthy-coral)]' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
                title={isRecording() ? 'Stop recording' : 'Start recording'}
              >
                <span class="material-symbols-outlined">mic</span>
              </button>
              <button
                type="button"
                onClick={() => loadSelectedModel()}
                onMouseEnter={openPanelForModel}
                onMouseLeave={schedulePanelCloseIfHover}
                disabled={appStore.modelState() === 'loading' || appStore.modelState() === 'ready'}
                class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed relative"
                title={appStore.modelState() === 'ready' ? 'Model loaded' : appStore.modelState() === 'loading' ? 'Loading...' : 'Load model'}
              >
                <Show when={appStore.modelState() === 'loading'} fallback={<span class="material-symbols-outlined">power_settings_new</span>}>
                  <span class="material-symbols-outlined load-btn-spin">progress_activity</span>
                </Show>
              </button>
              <button
                type="button"
                onClick={() => { setSettingsPanelSection('full'); setShowContextPanel((v) => !v); }}
                class={`w-10 h-10 rounded-full flex items-center justify-center transition-colors border ${showContextPanel() ? 'bg-[var(--color-earthy-sage)]/30 text-[var(--color-earthy-muted-green)] border-[var(--color-earthy-sage)]/50' : 'text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] border-transparent hover:border-[var(--color-earthy-sage)]/30'}`}
                title="Settings"
              >
                <span class="material-symbols-outlined">tune</span>
              </button>
              <button
                type="button"
                onClick={() => isRecording() && toggleRecording()}
                disabled={!isRecording()}
                class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30 disabled:opacity-40 disabled:cursor-not-allowed"
                title="Pause"
              >
                <span class="material-symbols-outlined">pause</span>
              </button>
              <button
                type="button"
                onClick={() => appStore.copyTranscript()}
                class="w-10 h-10 rounded-full flex items-center justify-center text-[var(--color-earthy-dark-brown)] hover:bg-[var(--color-earthy-bg)] transition-colors border border-transparent hover:border-[var(--color-earthy-sage)]/30"
                title="Copy transcript"
              >
                <span class="material-symbols-outlined">content_copy</span>
              </button>
            </div>
            </div>
          </div>
        </div>
      </div>

      {/* Foldable debug panel (bottom drawer) */}
      <Show when={appStore.showDebugPanel()}>
        <div class="absolute bottom-0 left-0 right-0 z-20 flex flex-col bg-[var(--color-earthy-bg)] border-t border-[var(--color-earthy-sage)]/30 shadow-[0_-4px_20px_rgba(0,0,0,0.08)] max-h-[70vh] overflow-hidden transition-all">
          <DebugPanel
            audioEngine={audioEngineSignal() ?? undefined}
            melClient={melClientSignal() ?? undefined}
          />
        </div>
      </Show>
    </div>
  );
};

export default App;