import { AudioEngine as IAudioEngine, AudioEngineConfig, AudioSegment, IRingBuffer, AudioMetrics } from './types';
import { RingBuffer } from './RingBuffer';
import { AudioSegmentProcessor, ProcessedSegment } from './AudioSegmentProcessor';
import { resampleLinear } from './utils';

/** Duration of the visualization buffer in seconds */
const VISUALIZATION_BUFFER_DURATION = 30;

/**
 * AudioEngine implementation for capturing audio, buffering it, and performing VAD.
 * Uses AudioSegmentProcessor for robust speech detection (incl. lookback).
 */
export class AudioEngine implements IAudioEngine {
    private config: AudioEngineConfig;
    private ringBuffer: IRingBuffer;
    private audioProcessor: AudioSegmentProcessor; // Replaces EnergyVAD
    private deviceId: string | null = null;

    private audioContext: AudioContext | null = null;
    private mediaStream: MediaStream | null = null;
    private workletNode: AudioWorkletNode | null = null;
    private sourceNode: MediaStreamAudioSourceNode | null = null;

    // AnalyserNode for oscilloscope waveform (native getByteTimeDomainData)
    private analyserNode: AnalyserNode | null = null;
    private analyserSourceNode: MediaStreamAudioSourceNode | null = null;
    private analyserGainNode: GainNode | null = null;
    private analyserTimeBuffer: Uint8Array | null = null;
    private waveformOut: Float32Array | null = null;
    private readonly ANALYSER_FFT_SIZE = 256;
    private readonly ANALYSER_SMOOTHING = 0.3; // Low = fast oscilloscope response

    // Track device vs target sample rates
    private deviceSampleRate: number = 48000;
    private targetSampleRate: number = 16000;

    private currentEnergy: number = 0;

    private segmentCallbacks: Array<(segment: AudioSegment) => void> = [];

    // Fixed-window streaming state (v3 token streaming mode)
    private windowCallbacks: Array<{
        windowDuration: number;
        overlapDuration: number;
        triggerInterval: number;
        callback: (audio: Float32Array, startTime: number) => void;
        lastWindowEnd: number; // Frame offset of last window end
    }> = [];

    // Resampled audio chunk callbacks (for mel worker, etc.)
    private audioChunkCallbacks: Array<(chunk: Float32Array) => void> = [];

    // SMA buffer for energy calculation
    private energyHistory: number[] = [];

    // Last N energy values for bar visualizer (oldest first when read)
    private energyBarHistory: number[] = [];
    private readonly BAR_LEVELS_SIZE = 64;

    // Visualization Summary Buffer (Low-Res Min/Max pairs)
    private visualizationSummary: Float32Array | null = null;
    private visualizationSummaryPosition: number = 0;
    private readonly VIS_SUMMARY_SIZE = 2000; // 2000 min/max pairs for 30 seconds = 15ms resolution

    // Raw visualization buffer (still kept for higher-res requests if needed, but summary is preferred)
    private visualizationBuffer: Float32Array | null = null;
    private visualizationBufferPosition: number = 0;
    private visualizationBufferSize: number = 0;

    // Metrics for UI components
    private metrics: AudioMetrics = {
        currentEnergy: 0,
        averageEnergy: 0,
        peakEnergy: 0,
        noiseFloor: 0.01,
        currentSNR: 0,
        isSpeaking: false,
    };

    // Subscribers for visualization updates
    private visualizationCallbacks: Array<(data: Float32Array, metrics: AudioMetrics, bufferEndTime: number) => void> = [];
    private lastVisualizationNotifyTime: number = 0;
    private readonly VISUALIZATION_NOTIFY_INTERVAL_MS = 16; // ~60fps for responsive oscilloscope

    // Recent segments for visualization (stores timing info only)
    private recentSegments: Array<{ startTime: number; endTime: number; isProcessed: boolean }> = [];
    private readonly MAX_SEGMENTS_FOR_VISUALIZATION = 50;

    constructor(config: Partial<AudioEngineConfig> = {}) {
        this.config = {
            sampleRate: 16000,
            bufferDuration: 120,
            energyThreshold: 0.08, // Match legacy UI project 'medium'
            minSpeechDuration: 240, // Match legacy UI project
            minSilenceDuration: 400, // Match legacy UI project
            maxSegmentDuration: 4.8, // Match legacy UI project

            // Advanced VAD defaults
            lookbackDuration: 0.120,
            speechHangover: 0.16,
            minEnergyIntegral: 22,
            minEnergyPerSecond: 5,
            useAdaptiveEnergyThresholds: true,
            adaptiveEnergyIntegralFactor: 25.0,
            adaptiveEnergyPerSecondFactor: 10.0,
            minAdaptiveEnergyIntegral: 3,
            minAdaptiveEnergyPerSecond: 1,
            maxSilenceWithinSpeech: 0.160,
            endingSpeechTolerance: 0.240,
            ...config,
        };

        this.deviceId = this.config.deviceId || null;
        this.targetSampleRate = this.config.sampleRate;

        // RingBuffer operates at TARGET sample rate (16kHz)
        this.ringBuffer = new RingBuffer(this.targetSampleRate, this.config.bufferDuration);

        // Initialize AudioSegmentProcessor
        this.audioProcessor = new AudioSegmentProcessor({
            sampleRate: this.targetSampleRate,
            energyThreshold: this.config.energyThreshold,
            minSpeechDuration: this.config.minSpeechDuration,
            silenceThreshold: this.config.minSilenceDuration,
            maxSegmentDuration: this.config.maxSegmentDuration,
            lookbackDuration: this.config.lookbackDuration,
            maxSilenceWithinSpeech: this.config.maxSilenceWithinSpeech,
            endingSpeechTolerance: this.config.endingSpeechTolerance,
            snrThreshold: 3.0,
            minSnrThreshold: 1.0,
            noiseFloorAdaptationRate: 0.05,
            fastAdaptationRate: 0.15,
            minBackgroundDuration: 1.0,
            energyRiseThreshold: 0.08
        });

        // Initialize visualization buffer (30 seconds at target sample rate)
        this.visualizationBufferSize = Math.round(this.targetSampleRate * VISUALIZATION_BUFFER_DURATION);
        this.visualizationBuffer = new Float32Array(this.visualizationBufferSize);
        this.visualizationBufferPosition = 0;

        // Initialize visualization summary (2000 points for 30s)
        this.visualizationSummary = new Float32Array(this.VIS_SUMMARY_SIZE * 2);
        this.visualizationSummaryPosition = 0;

        console.log('[AudioEngine] Initialized with config:', this.config);
    }

    private isWorkletInitialized = false;

    async init(): Promise<void> {
        // Request microphone permission with optional deviceId
        try {
            if (this.mediaStream) {
                this.mediaStream.getTracks().forEach(t => t.stop());
            }

            const constraints: MediaStreamConstraints = {
                audio: {
                    deviceId: this.deviceId ? { exact: this.deviceId } : undefined,
                    channelCount: 1,
                    echoCancellation: false,
                    noiseSuppression: false,
                    autoGainControl: false,
                },
            };

            console.log('[AudioEngine] Requesting microphone:', constraints);
            this.mediaStream = await navigator.mediaDevices.getUserMedia(constraints);
            console.log('[AudioEngine] Microphone stream acquired:', this.mediaStream.id);
        } catch (err) {
            console.error('[AudioEngine] Failed to get media stream:', err);
            throw err;
        }

        const track = this.mediaStream!.getAudioTracks()[0];
        const trackSettings = track?.getSettings?.();
        // Device sample rate (what the mic gives us)
        this.deviceSampleRate = trackSettings?.sampleRate ?? 48000;
        console.log('[AudioEngine] Device sample rate:', this.deviceSampleRate, '-> Target:', this.targetSampleRate);

        if (this.audioContext && this.audioContext.sampleRate !== this.deviceSampleRate) {
            await this.audioContext.close();
            this.audioContext = null;
        }
        if (!this.audioContext) {
            this.audioContext = new AudioContext({
                sampleRate: this.deviceSampleRate,
                latencyHint: 'interactive',
            });
            console.log('[AudioEngine] Created AudioContext:', this.audioContext.state, 'sampleRate:', this.audioContext.sampleRate);
        }

        // Re-initialize components with correct rates
        this.ringBuffer = new RingBuffer(this.targetSampleRate, this.config.bufferDuration);

        // Update processor config
        this.audioProcessor = new AudioSegmentProcessor({
            sampleRate: this.targetSampleRate,
            energyThreshold: this.config.energyThreshold,
            minSpeechDuration: this.config.minSpeechDuration,
            silenceThreshold: this.config.minSilenceDuration,
            maxSegmentDuration: this.config.maxSegmentDuration,
        });

        if (!this.isWorkletInitialized) {
            const windowDuration = 0.080;
            const processorCode = `
                class CaptureProcessor extends AudioWorkletProcessor {
                    constructor(options) {
                        super(options);
                        const opts = options?.processorOptions || {};
                        this.inputSampleRate = opts.inputSampleRate || 16000;
                        this.targetSampleRate = opts.targetSampleRate || this.inputSampleRate;
                        this.ratio = this.inputSampleRate / this.targetSampleRate;
                        this.bufferSize = Math.round(${windowDuration} * this.inputSampleRate);
                        this.buffer = new Float32Array(this.bufferSize);
                        this.index = 0;
                        this._lastLog = 0;
                    }

                    _emitChunk() {
                        let out;
                        let maxAbs = 0;

                        if (this.targetSampleRate === this.inputSampleRate) {
                            out = new Float32Array(this.bufferSize);
                            for (let i = 0; i < this.bufferSize; i++) {
                                const v = this.buffer[i];
                                out[i] = v;
                                const a = v < 0 ? -v : v;
                                if (a > maxAbs) maxAbs = a;
                            }
                        } else {
                            const outLength = Math.floor(this.bufferSize / this.ratio);
                            out = new Float32Array(outLength);
                            for (let i = 0; i < outLength; i++) {
                                const srcIndex = i * this.ratio;
                                const srcIndexFloor = Math.floor(srcIndex);
                                const srcIndexCeil = Math.min(srcIndexFloor + 1, this.bufferSize - 1);
                                const t = srcIndex - srcIndexFloor;
                                const v = this.buffer[srcIndexFloor] * (1 - t) + this.buffer[srcIndexCeil] * t;
                                out[i] = v;
                                const a = v < 0 ? -v : v;
                                if (a > maxAbs) maxAbs = a;
                            }
                        }

                        this.port.postMessage(
                            { type: 'audio', samples: out, sampleRate: this.targetSampleRate, maxAbs },
                            [out.buffer]
                        );
                    }

                    process(inputs) {
                        const input = inputs[0];
                        if (!input || !input[0]) return true;
                        
                        const channelData = input[0];
                        
                        // Buffer the data
                        for (let i = 0; i < channelData.length; i++) {
                            this.buffer[this.index++] = channelData[i];
                            
                            if (this.index >= this.bufferSize) {
                                this._emitChunk();
                                this.index = 0;
                                
                                // Debug log every ~5 seconds
                                const now = Date.now();
                                if (now - this._lastLog > 5000) {
                                    this.port.postMessage({ type: 'log', message: '[AudioWorklet] Active' });
                                    this._lastLog = now;
                                }
                            }
                        }
                        
                        return true;
                    }
                }
                registerProcessor('capture-processor', CaptureProcessor);
            `;
            const blob = new Blob([processorCode], { type: 'application/javascript' });
            const url = URL.createObjectURL(blob);
            try {
                await this.audioContext.audioWorklet.addModule(url);
                this.isWorkletInitialized = true;
                console.log('[AudioEngine] AudioWorklet module loaded');
            } catch (err) {
                console.error('[AudioEngine] Failed to load worklet:', err);
                if (err instanceof Error && err.name === 'InvalidStateError') {
                    // Ignore if already registered
                    this.isWorkletInitialized = true;
                }
            }
        }

        // Re-create worklet node if needed (it might handle dispose differently, but safe to new)
        if (this.workletNode) this.workletNode.disconnect();

        this.workletNode = new AudioWorkletNode(this.audioContext, 'capture-processor', {
            processorOptions: { inputSampleRate: this.deviceSampleRate, targetSampleRate: this.targetSampleRate },
        });
        this.workletNode.port.onmessage = (event: MessageEvent<any>) => {
            if (event.data?.type === 'audio' && event.data.samples instanceof Float32Array) {
                this.handleAudioChunk(event.data.samples, event.data.maxAbs, event.data.sampleRate);
            } else if (event.data instanceof Float32Array) {
                this.handleAudioChunk(event.data, undefined, this.deviceSampleRate);
            } else if (event.data?.type === 'log') {
                console.log(event.data.message);
            }
        };
        this.workletNode.onprocessorerror = (e) => {
            console.error('[AudioEngine] Worklet processor error:', e);
        };

        // Reconnect source node
        this.sourceNode?.disconnect();
        this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
        this.sourceNode.connect(this.workletNode);

        // AnalyserNode branch for lightweight preview bars (native FFT, no mel worker)
        this.disposeAnalyser();
        this.analyserSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
        this.analyserNode = this.audioContext.createAnalyser();
        this.analyserNode.fftSize = this.ANALYSER_FFT_SIZE;
        this.analyserNode.smoothingTimeConstant = this.ANALYSER_SMOOTHING;
        this.analyserTimeBuffer = new Uint8Array(this.analyserNode.fftSize);
        this.waveformOut = new Float32Array(this.analyserNode.fftSize);

        this.analyserGainNode = this.audioContext.createGain();
        this.analyserGainNode.gain.value = 0;

        this.analyserSourceNode.connect(this.analyserNode);
        this.analyserNode.connect(this.analyserGainNode);
        this.analyserGainNode.connect(this.audioContext.destination);

        // Keep graph alive
        this.workletNode.connect(this.audioContext.destination);
        console.log('[AudioEngine] Graph connected: Source -> Worklet, AnalyserNode for oscilloscope');
    }

    async start(): Promise<void> {
        if (!this.mediaStream || !this.audioContext || !this.workletNode) {
            await this.init();
        }

        if (this.audioContext?.state === 'suspended') {
            await this.audioContext.resume();
        }
    }

    stop(): void {
        if (this.audioContext?.state === 'running') {
            this.audioContext.suspend();
        }
    }

    /**
     * Reset buffers and VAD state for a new session while keeping the audio graph.
     * Aligns visualization + segment timebase to 0, matching legacy UI project behavior.
     */
    reset(): void {
        // Reset audio/VAD state
        this.ringBuffer.reset();
        this.audioProcessor.reset();
        this.currentEnergy = 0;

        // Reset metrics
        this.metrics = {
            currentEnergy: 0,
            averageEnergy: 0,
            peakEnergy: 0,
            noiseFloor: 0.01,
            currentSNR: 0,
            isSpeaking: false,
        };

        // Clear segment history used by the visualizer
        this.recentSegments = [];
        this.energyBarHistory = [];

        // Reset visualization buffer
        if (this.visualizationBuffer) {
            this.visualizationBuffer.fill(0);
        }
        this.visualizationBufferPosition = 0;

        // Reset windowed streaming cursors
        for (const entry of this.windowCallbacks) {
            entry.lastWindowEnd = 0;
        }

        // Push a blank update so UI clears stale waveform/segments
        this.notifyVisualizationUpdate();
    }

    getCurrentEnergy(): number {
        return this.currentEnergy;
    }

    /** Oscilloscope waveform from AnalyserNode.getByteTimeDomainData (native, fast). Values -1..1. */
    getBarLevels(): Float32Array {
        if (this.analyserNode && this.analyserTimeBuffer && this.waveformOut) {
            (this.analyserNode as { getByteTimeDomainData(array: Uint8Array): void }).getByteTimeDomainData(this.analyserTimeBuffer);
            for (let i = 0; i < this.analyserTimeBuffer.length; i++) {
                this.waveformOut[i] = (this.analyserTimeBuffer[i] - 128) / 128; // 0..255 -> -1..1
            }
            return this.waveformOut;
        }
        const out = new Float32Array(this.BAR_LEVELS_SIZE);
        const h = this.energyBarHistory;
        const start = h.length <= this.BAR_LEVELS_SIZE ? 0 : h.length - this.BAR_LEVELS_SIZE;
        for (let i = 0; i < this.BAR_LEVELS_SIZE; i++) {
            const idx = start + i;
            out[i] = idx < h.length ? Math.min(1, Math.max(0, h[idx])) : 0;
        }
        return out;
    }

    getSignalMetrics(): { noiseFloor: number; snr: number; threshold: number; snrThreshold: number } {
        const stats = this.audioProcessor.getStats();
        return {
            noiseFloor: stats.noiseFloor ?? 0.0001,
            snr: stats.snr ?? 0,
            threshold: this.config.energyThreshold,
            snrThreshold: stats.snrThreshold ?? 3.0
        };
    }

    isSpeechActive(): boolean {
        return this.audioProcessor.getStateInfo().inSpeech;
    }

    getRingBuffer(): IRingBuffer {
        return this.ringBuffer;
    }

    onSpeechSegment(callback: (segment: AudioSegment) => void): () => void {
        this.segmentCallbacks.push(callback);
        return () => {
            this.segmentCallbacks = this.segmentCallbacks.filter((cb) => cb !== callback);
        };
    }

    /**
     * Subscribe to fixed-window chunks for token streaming mode.
     * Fires every triggerInterval seconds with windowDuration of audio.
     */
    onWindowChunk(
        windowDuration: number,
        overlapDuration: number,
        triggerInterval: number,
        callback: (audio: Float32Array, startTime: number) => void
    ): () => void {
        const entry = {
            windowDuration,
            overlapDuration,
            triggerInterval,
            callback,
            lastWindowEnd: 0, // Will be set on first chunk
        };
        this.windowCallbacks.push(entry);

        return () => {
            this.windowCallbacks = this.windowCallbacks.filter((e) => e !== entry);
        };
    }

    /**
     * Subscribe to every resampled audio chunk (16kHz).
     * Used to feed the continuous mel producer worker.
     * Returns an unsubscribe function.
     */
    onAudioChunk(callback: (chunk: Float32Array) => void): () => void {
        this.audioChunkCallbacks.push(callback);
        return () => {
            this.audioChunkCallbacks = this.audioChunkCallbacks.filter((cb) => cb !== callback);
        };
    }

    updateConfig(config: Partial<AudioEngineConfig>): void {
        this.config = { ...this.config, ...config };

        // Update processor config
        if (config.energyThreshold !== undefined) this.audioProcessor.setThreshold(config.energyThreshold);
        if (config.minSpeechDuration !== undefined) this.audioProcessor.setMinSpeechDuration(config.minSpeechDuration);
        if (config.minSilenceDuration !== undefined) this.audioProcessor.setSilenceLength(config.minSilenceDuration);
        if (config.maxSegmentDuration !== undefined) this.audioProcessor.setMaxSegmentDuration(config.maxSegmentDuration);

        // Advanced VAD updates
        if (config.lookbackDuration !== undefined) this.audioProcessor.setLookbackDuration(config.lookbackDuration);
        if (config.overlapDuration !== undefined) this.audioProcessor.setOverlapDuration(config.overlapDuration);
        if (config.maxSilenceWithinSpeech !== undefined) this.audioProcessor.setMaxSilenceWithinSpeech(config.maxSilenceWithinSpeech);
        if (config.endingSpeechTolerance !== undefined) this.audioProcessor.setEndingSpeechTolerance(config.endingSpeechTolerance);

        if (config.snrThreshold !== undefined) this.audioProcessor.setSnrThreshold(config.snrThreshold);
        if (config.minSnrThreshold !== undefined) this.audioProcessor.setMinSnrThreshold(config.minSnrThreshold);
    }

    async setDevice(deviceId: string): Promise<void> {
        this.deviceId = deviceId;
        await this.init();

        // Reconnect if running
        if (this.audioContext && this.workletNode) {
            this.sourceNode?.disconnect();
            this.sourceNode = this.audioContext.createMediaStreamSource(this.mediaStream!);
            this.sourceNode.connect(this.workletNode);
        }
    }

    private disposeAnalyser(): void {
        this.analyserSourceNode?.disconnect();
        this.analyserNode?.disconnect();
        this.analyserGainNode?.disconnect();
        this.analyserSourceNode = null;
        this.analyserNode = null;
        this.analyserGainNode = null;
        this.analyserTimeBuffer = null;
        this.waveformOut = null;
    }

    dispose(): void {
        this.stop();
        this.disposeAnalyser();
        this.mediaStream?.getTracks().forEach(track => track.stop());
        this.audioContext?.close();
        this.audioContext = null;
        this.mediaStream = null;
        this.workletNode = null;
        this.sourceNode = null;
    }

    private handleAudioChunk(rawChunk: Float32Array, precomputedMaxAbs?: number, chunkSampleRate?: number): void {
        // 0. Ensure chunk is at target sample rate (resample only if needed)
        const sampleRate = chunkSampleRate ?? this.targetSampleRate;
        const needsResample = sampleRate !== this.targetSampleRate;
        const chunk = needsResample
            ? resampleLinear(rawChunk, sampleRate, this.targetSampleRate)
            : rawChunk;

        // Calculate chunk energy (Peak Amplitude) + SMA for VAD compatibility
        let maxAbs = (!needsResample && precomputedMaxAbs !== undefined) ? precomputedMaxAbs : 0;
        if (precomputedMaxAbs === undefined || needsResample) {
            for (let i = 0; i < chunk.length; i++) {
                const abs = Math.abs(chunk[i]);
                if (abs > maxAbs) maxAbs = abs;
            }
        }

        // SMA Smoothing (matching legacy UI project logic)
        this.energyHistory.push(maxAbs);
        if (this.energyHistory.length > 6) {
            this.energyHistory.shift();
        }
        const energy = this.energyHistory.reduce((a: number, b: number) => a + b, 0) / this.energyHistory.length;

        this.currentEnergy = energy;
        this.energyBarHistory.push(energy);
        if (this.energyBarHistory.length > this.BAR_LEVELS_SIZE) {
            this.energyBarHistory.shift();
        }

        // Log when energy crosses threshold if state is close to changing
        const isSpeech = energy > this.config.energyThreshold;
        const wasSpeaking = this.metrics.isSpeaking;
        if (isSpeech !== wasSpeaking) {
            console.debug(`[AudioEngine] Energy threshold crossed: ${energy.toFixed(6)} > ${this.config.energyThreshold} = ${isSpeech}`);
        }

        // 1. Write to ring buffer before any callbacks can transfer the chunk.
        this.ringBuffer.write(chunk);

        const endFrame = this.ringBuffer.getCurrentFrame();

        // 2. Process VAD on resampled audio
        // The processor uses its own internal history for lookback, but we pull full audio from ring buffer later.
        const currentTime = this.ringBuffer.getCurrentTime();
        const segments = this.audioProcessor.processAudioData(chunk, currentTime, energy);

        // 2.5 Update visualization buffer
        this.updateVisualizationBuffer(chunk);

        // 2.6 Update metrics
        const stats = this.audioProcessor.getStats();
        const stateInfo = this.audioProcessor.getStateInfo();

        this.metrics.currentEnergy = energy;
        this.metrics.averageEnergy = this.metrics.averageEnergy * 0.95 + energy * 0.05;
        this.metrics.peakEnergy = Math.max(this.metrics.peakEnergy * 0.99, energy);
        this.metrics.noiseFloor = stats.noiseFloor ?? 0.01;
        this.metrics.currentSNR = stats.snr ?? 0;
        this.metrics.isSpeaking = stateInfo.inSpeech;

        // Periodic debug log
        if (Math.random() < 0.05) {
            console.debug(`[AudioEngine] Metrics: E=${energy.toFixed(6)}, NF=${this.metrics.noiseFloor.toFixed(6)}, SNR=${this.metrics.currentSNR.toFixed(2)}, Speaking=${this.metrics.isSpeaking}`);
        }

        // 3. Handle segments
        if (segments.length > 0) {
            for (const seg of segments) {
                // Apply lookback and overlap adjustments matching legacy UI project
                const lookbackDuration = this.config.lookbackDuration ?? 0.120;
                const startTime = Math.max(0, seg.startTime - lookbackDuration);

                // Calculate the sample positions for audio extraction
                const startFrame = Math.round(startTime * this.targetSampleRate);
                const endFrame = Math.round(seg.endTime * this.targetSampleRate);

                // Retrieval with padding (hangover)
                const speechHangover = this.config.speechHangover ?? 0.16;
                const paddedEndFrame = Math.min(
                    this.ringBuffer.getCurrentFrame(),
                    endFrame + Math.round(speechHangover * this.targetSampleRate)
                );

                try {
                    const audioData = this.ringBuffer.read(startFrame, paddedEndFrame);

                    // Calculate precise energy metrics for filtering
                    const metrics = this.calculateSegmentEnergyMetrics(audioData, this.targetSampleRate);

                    // Normalize power to 16kHz equivalent
                    const normalizedPowerAt16k = metrics.averagePower * 16000;
                    const normalizedEnergyIntegralAt16k = normalizedPowerAt16k * metrics.duration;

                    // Adaptive threshold calculation
                    let minEnergyIntegralThreshold = this.config.minEnergyIntegral ?? 22;
                    let minEnergyPerSecondThreshold = this.config.minEnergyPerSecond ?? 5;

                    if (this.config.useAdaptiveEnergyThresholds) {
                        const windowSize = this.config.windowSize ?? Math.round(0.080 * this.targetSampleRate);
                        const normalizedNoiseFloor = windowSize > 0 ? this.metrics.noiseFloor / windowSize : 0;
                        const noiseFloorAt16k = normalizedNoiseFloor * 16000;

                        const adaptiveMinEnergyIntegral = noiseFloorAt16k * (this.config.adaptiveEnergyIntegralFactor ?? 25.0);
                        minEnergyIntegralThreshold = Math.max(this.config.minAdaptiveEnergyIntegral ?? 3, adaptiveMinEnergyIntegral);

                        const adaptiveMinEnergyPerSecond = noiseFloorAt16k * (this.config.adaptiveEnergyPerSecondFactor ?? 10.0);
                        minEnergyPerSecondThreshold = Math.max(this.config.minAdaptiveEnergyPerSecond ?? 1, adaptiveMinEnergyPerSecond);
                    }

                    const isValidSpeech =
                        metrics.duration >= (this.config.minSpeechDuration / 1000) &&
                        normalizedPowerAt16k >= minEnergyPerSecondThreshold &&
                        normalizedEnergyIntegralAt16k >= minEnergyIntegralThreshold;

                    if (isValidSpeech) {
                        const audioSegment: AudioSegment = {
                            startFrame: startFrame,
                            endFrame: paddedEndFrame,
                            duration: metrics.duration,
                            averageEnergy: metrics.averagePower,
                            timestamp: Date.now(),
                        };
                        this.notifySegment(audioSegment);
                    } else {
                        console.log('[AudioEngine] Filtered out noise segment:', {
                            duration: metrics.duration,
                            power: normalizedPowerAt16k,
                            integral: normalizedEnergyIntegralAt16k
                        });
                    }
                } catch (err) {
                    console.warn('[AudioEngine] Failed to extract audio for validation:', err);
                }
            }
        }

        // 6. Fixed-window streaming (v3 token streaming mode)
        this.processWindowCallbacks(endFrame);

        // 7. Notify audio chunk subscribers AFTER internal processing.
        // Callbacks may transfer the chunk's buffer; do not use `chunk` after this.
        for (const cb of this.audioChunkCallbacks) {
            cb(chunk);
        }

        // 8. Notify visualization subscribers
        this.notifyVisualizationUpdate();
    }

    /**
     * Helper to read audio from ring buffer and calculate energy metrics for a detected segment.
     */
    private calculateSegmentEnergyMetrics(audioData: Float32Array, sampleRate: number): { averagePower: number; duration: number; numSamples: number } {
        if (!audioData || audioData.length === 0) {
            return { averagePower: 0, duration: 0, numSamples: 0 };
        }

        const numSamples = audioData.length;
        let sumOfSquares = 0;

        for (let i = 0; i < numSamples; i++) {
            sumOfSquares += audioData[i] * audioData[i];
        }

        const duration = numSamples / sampleRate;
        const averagePower = numSamples > 0 ? sumOfSquares / numSamples : 0;

        return {
            averagePower,
            duration,
            numSamples
        };
    }

    /**
     * Process fixed-window callbacks for token streaming mode.
     * Fires when enough audio has accumulated for a new window.
     */
    private processWindowCallbacks(currentFrame: number): void {
        for (const entry of this.windowCallbacks) {
            const windowFrames = Math.floor(entry.windowDuration * this.targetSampleRate);
            const stepFrames = Math.floor(entry.triggerInterval * this.targetSampleRate);

            // Initialize lastWindowEnd on first call
            if (entry.lastWindowEnd === 0) {
                entry.lastWindowEnd = currentFrame;
                continue;
            }

            // Check if we have enough new audio for the next window
            const framesSinceLastWindow = currentFrame - entry.lastWindowEnd;
            if (framesSinceLastWindow >= stepFrames) {
                // Calculate window boundaries
                const windowEnd = currentFrame;
                const windowStart = windowEnd - windowFrames;

                // Ensure we have enough data in the ring buffer
                const baseOffset = this.ringBuffer.getBaseFrameOffset();
                if (windowStart >= baseOffset) {
                    try {
                        const audio = this.ringBuffer.read(windowStart, windowEnd);
                        const startTime = windowStart / this.targetSampleRate;

                        entry.callback(audio, startTime);
                        entry.lastWindowEnd = windowEnd;
                    } catch (e) {
                        console.warn('[AudioEngine] Window read failed:', e);
                    }
                }
            }
        }
    }

    private notifySegment(segment: AudioSegment): void {
        // Track segment for visualization
        this.recentSegments.push({
            startTime: segment.startFrame / this.targetSampleRate,
            endTime: segment.endFrame / this.targetSampleRate,
            isProcessed: false
        });

        // Limit segments count
        if (this.recentSegments.length > this.MAX_SEGMENTS_FOR_VISUALIZATION) {
            this.recentSegments.shift();
        }

        this.segmentCallbacks.forEach((cb) => cb(segment));
    }

    /**
     * Get recent segments for visualization.
     */
    getSegmentsForVisualization(): Array<{ startTime: number; endTime: number; isProcessed: boolean }> {
        const segments = [...this.recentSegments];

        // Add pending segment if speech is currently active
        const vadState = this.audioProcessor.getStateInfo();
        if (vadState.inSpeech && vadState.speechStartTime !== null) {
            segments.push({
                startTime: vadState.speechStartTime,
                endTime: this.ringBuffer.getCurrentTime(),
                isProcessed: false // Pending
            });
        }

        return segments;
    }

    /**
     * Mark a segment as processed (for visualization color coding).
     */
    markSegmentProcessed(startTime: number): void {
        const segment = this.recentSegments.find(s => Math.abs(s.startTime - startTime) < 0.1);
        if (segment) {
            segment.isProcessed = true;
        }
    }

    /**
     * Update the visualization buffer and summary with new audio data.
     */
    private updateVisualizationBuffer(chunk: Float32Array): void {
        if (!this.visualizationBuffer || !this.visualizationSummary) return;

        const chunkLength = chunk.length;
        const bufferLength = this.visualizationBufferSize;

        // 1. Update raw circular buffer
        if (chunkLength >= bufferLength) {
            this.visualizationBuffer.set(chunk.subarray(chunkLength - bufferLength));
            this.visualizationBufferPosition = 0;
        } else {
            const endPosition = this.visualizationBufferPosition + chunkLength;
            if (endPosition <= bufferLength) {
                this.visualizationBuffer.set(chunk, this.visualizationBufferPosition);
                this.visualizationBufferPosition = endPosition % bufferLength;
            } else {
                const firstPart = bufferLength - this.visualizationBufferPosition;
                this.visualizationBuffer.set(chunk.subarray(0, firstPart), this.visualizationBufferPosition);
                this.visualizationBuffer.set(chunk.subarray(firstPart), 0);
                this.visualizationBufferPosition = (chunkLength - firstPart) % bufferLength;
            }
        }

        // 2. Update summary buffer (Low-res min/max pairs)
        // Each point in VIS_SUMMARY_SIZE represents bufferLength / VIS_SUMMARY_SIZE samples
        const samplesPerPoint = bufferLength / this.VIS_SUMMARY_SIZE;
        const numNewPoints = Math.round(chunkLength / samplesPerPoint);

        for (let i = 0; i < numNewPoints; i++) {
            const start = Math.floor(i * samplesPerPoint);
            const end = Math.min(chunkLength, Math.floor((i + 1) * samplesPerPoint));
            if (start >= end) continue;

            let min = chunk[start];
            let max = chunk[start];
            for (let s = start + 1; s < end; s++) {
                const v = chunk[s];
                if (v < min) min = v;
                if (v > max) max = v;
            }

            // Write to circular summary
            const targetIdx = this.visualizationSummaryPosition * 2;
            this.visualizationSummary[targetIdx] = min;
            this.visualizationSummary[targetIdx + 1] = max;
            this.visualizationSummaryPosition = (this.visualizationSummaryPosition + 1) % this.VIS_SUMMARY_SIZE;
        }
    }

    /**
     * Get visualization data subsampled to fit the target width.
     * Returns min/max pairs for each pixel to preserve peaks in the waveform.
     * Zero-allocation except for the returned result.
     * @param targetWidth - The desired number of data points (e.g., canvas width).
     * @returns Float32Array containing alternating min/max values, length targetWidth * 2.
     */
    getVisualizationData(targetWidth: number): Float32Array {
        if (!this.visualizationSummary || !targetWidth || targetWidth <= 0) {
            return new Float32Array(0);
        }

        // If targetWidth is close to or less than our summary size, use the summary (MUCH faster)
        if (targetWidth <= this.VIS_SUMMARY_SIZE) {
            const subsampledBuffer = new Float32Array(targetWidth * 2);
            const samplesPerTarget = this.VIS_SUMMARY_SIZE / targetWidth;

            for (let i = 0; i < targetWidth; i++) {
                const rangeStart = i * samplesPerTarget;
                const rangeEnd = (i + 1) * samplesPerTarget;

                let minVal = 0;
                let maxVal = 0;
                let first = true;

                for (let s = Math.floor(rangeStart); s < Math.floor(rangeEnd); s++) {
                    const idx = ((this.visualizationSummaryPosition + s) % this.VIS_SUMMARY_SIZE) * 2;
                    const vMin = this.visualizationSummary[idx];
                    const vMax = this.visualizationSummary[idx + 1];

                    if (first) {
                        minVal = vMin;
                        maxVal = vMax;
                        first = false;
                    } else {
                        if (vMin < minVal) minVal = vMin;
                        if (vMax > maxVal) maxVal = vMax;
                    }
                }

                subsampledBuffer[i * 2] = minVal;
                subsampledBuffer[i * 2 + 1] = maxVal;
            }
            return subsampledBuffer;
        }

        return this.getVisualizationDataFromRaw(targetWidth);
    }

    private getVisualizationDataFromRaw(targetWidth: number): Float32Array {
        if (!this.visualizationBuffer) return new Float32Array(0);
        const buffer = this.visualizationBuffer;
        const bufferLength = this.visualizationBufferSize;
        const pos = this.visualizationBufferPosition;
        const samplesPerPoint = bufferLength / targetWidth;
        const subsampledBuffer = new Float32Array(targetWidth * 2);

        // Logical index s maps to physical index:
        // if s < wrapS: pos + s
        // else: s - wrapS (which is s - (bufferLength - pos) = s + pos - bufferLength)
        const wrapS = bufferLength - pos;

        for (let i = 0; i < targetWidth; i++) {
            const startS = Math.floor(i * samplesPerPoint);
            const endS = Math.floor((i + 1) * samplesPerPoint);

            let minVal = 0;
            let maxVal = 0;
            let first = true;

            // Part 1: Before wrap (Logical indices < wrapS)
            // Physical indices: pos + s
            const end1 = (endS < wrapS) ? endS : wrapS;
            if (startS < end1) {
                let p = pos + startS;
                const pEnd = pos + end1;

                if (first && p < pEnd) {
                    const val = buffer[p];
                    minVal = val;
                    maxVal = val;
                    first = false;
                    p++;
                }

                for (; p < pEnd; p++) {
                    const val = buffer[p];
                    if (val < minVal) minVal = val;
                    else if (val > maxVal) maxVal = val;
                }
            }

            // Part 2: After wrap (Logical indices >= wrapS)
            // Physical indices: s - wrapS
            const start2 = (startS > wrapS) ? startS : wrapS;
            if (start2 < endS) {
                let p = start2 - wrapS;
                const pEnd = endS - wrapS;

                if (first && p < pEnd) {
                    const val = buffer[p];
                    minVal = val;
                    maxVal = val;
                    first = false;
                    p++;
                }

                for (; p < pEnd; p++) {
                    const val = buffer[p];
                    if (val < minVal) minVal = val;
                    else if (val > maxVal) maxVal = val;
                }
            }

            subsampledBuffer[i * 2] = minVal;
            subsampledBuffer[i * 2 + 1] = maxVal;
        }
        return subsampledBuffer;
    }


    /**
     * Get current audio metrics for UI visualization.
     */
    getMetrics(): AudioMetrics {
        return { ...this.metrics };
    }

    /**
     * Get current time in seconds (for waveform time markers).
     */
    getCurrentTime(): number {
        return this.ringBuffer.getCurrentTime();
    }

    /**
     * Get the visualization buffer duration in seconds.
     */
    getVisualizationDuration(): number {
        return VISUALIZATION_BUFFER_DURATION;
    }

    /**
     * Subscribe to visualization updates.
     * Callback is invoked after each audio chunk is processed.
     */
    onVisualizationUpdate(callback: (data: Float32Array, metrics: AudioMetrics, bufferEndTime: number) => void): () => void {
        this.visualizationCallbacks.push(callback);
        return () => {
            this.visualizationCallbacks = this.visualizationCallbacks.filter((cb) => cb !== callback);
        };
    }

    /**
     * Notify visualization subscribers with updated data.
     * Throttled to ~30fps to avoid UI stuttering.
     */
    private notifyVisualizationUpdate(): void {
        const now = performance.now();
        if (now - this.lastVisualizationNotifyTime < this.VISUALIZATION_NOTIFY_INTERVAL_MS) {
            return;
        }
        this.lastVisualizationNotifyTime = now;

        const data = this.getVisualizationData(400); // 400 points is enough for modern displays and saves CPU
        const bufferEndTime = this.ringBuffer.getCurrentTime();
        this.visualizationCallbacks.forEach((cb) => cb(data, this.getMetrics(), bufferEndTime));
    }
}