/**
 * Keet - Audio Segment Processor
 * Ported from legacy UI project/AudioSegmentProcessor.js
 * 
 * Sophisticated VAD-based segment processor with:
 * - Speech onset detection with lookback
 * - Rising energy trend analysis
 * - Adaptive noise floor tracking
 * - SNR-based speech detection
 * - Proactive segment splitting for long utterances
 */

import { defaultAudioParams, windowDuration as DEFAULT_WINDOW_DURATION } from './audioParams';

/** Chunk metadata for speech tracking */
interface ChunkInfo {
    time: number;
    energy: number;
    isSpeech: boolean;
    snr: number;
}

/** Speech/silence statistics */
interface SegmentStats {
    startTime: number;
    endTime: number;
    duration: number;
    avgEnergy: number;
    energyIntegral: number;
}

/** Statistics summary */
interface StatsSummary {
    avgDuration: number;
    avgEnergy: number;
    avgEnergyIntegral: number;
}

/** Current stats snapshot */
interface CurrentStats {
    silence: StatsSummary;
    speech: StatsSummary;
    noiseFloor: number;
    snr: number;
    snrThreshold: number;
    minSnrThreshold: number;
    energyRiseThreshold: number;
}

/** Processor state */
interface ProcessorState {
    inSpeech: boolean;
    speechStartTime: number | null;
    silenceStartTime: number | null;
    silenceCounter: number;
    recentChunks: ChunkInfo[];
    speechEnergies: number[];
    silenceEnergies: number[];
    speechStats: SegmentStats[];
    silenceStats: SegmentStats[];
    currentStats: CurrentStats;
    segmentCounter: number;
    noiseFloor: number;
    recentEnergies: number[];
    silenceDuration: number;
}

/** Segment output */
export interface ProcessedSegment {
    startTime: number;
    endTime: number;
    duration: number;
}

/** Processor configuration */
export interface AudioSegmentProcessorConfig {
    sampleRate: number;
    windowSize: number;
    minSpeechDuration: number;
    silenceThreshold: number;
    energyThreshold: number;
    smaLength: number;
    lookbackChunks: number;
    overlapDuration: number;
    lookbackDuration: number;
    maxHistoryLength: number;
    noiseFloorAdaptationRate: number;
    fastAdaptationRate: number;
    snrThreshold: number;
    minBackgroundDuration: number;
    minSnrThreshold: number;
    energyRiseThreshold: number;
    maxSegmentDuration: number;
    maxSilenceWithinSpeech: number;
    endingSpeechTolerance: number;
    logger?: (message: string, data?: unknown) => void;
}

/**
 * AudioSegmentProcessor - Sophisticated VAD with speech onset detection
 */
export class AudioSegmentProcessor {
    private options: AudioSegmentProcessorConfig;
    private state!: ProcessorState;

    constructor(options: Partial<AudioSegmentProcessorConfig> = {}) {
        const sampleRate = options.sampleRate ?? defaultAudioParams.sampleRate ?? 16000;

        // Calculate window size based on sample rate (80ms window)
        const windowSize = Math.round(DEFAULT_WINDOW_DURATION * sampleRate);

        this.options = {
            sampleRate,

            minSpeechDuration: defaultAudioParams.minSpeechDuration,
            silenceThreshold: defaultAudioParams.silenceLength,
            energyThreshold: defaultAudioParams.audioThreshold,
            smaLength: defaultAudioParams.smaLength,
            lookbackChunks: defaultAudioParams.lookbackChunks,
            overlapDuration: defaultAudioParams.overlapDuration,
            lookbackDuration: defaultAudioParams.lookbackDuration,
            maxHistoryLength: defaultAudioParams.maxHistoryLength,
            noiseFloorAdaptationRate: defaultAudioParams.noiseFloorAdaptationRate,
            fastAdaptationRate: defaultAudioParams.fastAdaptationRate,
            snrThreshold: defaultAudioParams.snrThreshold,
            minBackgroundDuration: defaultAudioParams.minBackgroundDuration,
            minSnrThreshold: defaultAudioParams.minSnrThreshold,
            energyRiseThreshold: defaultAudioParams.energyRiseThreshold,
            maxSegmentDuration: defaultAudioParams.maxSegmentDuration,
            maxSilenceWithinSpeech: defaultAudioParams.maxSilenceWithinSpeech,
            endingSpeechTolerance: defaultAudioParams.endingSpeechTolerance,
            logger: console.log,
            ...options,
            // Ensure windowSize is recalculated if sampleRate was overridden
            windowSize: Math.round(DEFAULT_WINDOW_DURATION * (options.sampleRate ?? sampleRate))
        };

        this.log('Initialized AudioSegmentProcessor', {
            sampleRate: this.options.sampleRate,
            windowSize: this.options.windowSize,
            lookbackDuration: this.options.lookbackDuration,
            overlapDuration: this.options.overlapDuration,
            snrThreshold: this.options.snrThreshold,
            minSnrThreshold: this.options.minSnrThreshold
        });

        this.reset();
    }

    private log(message: string, data?: unknown): void {
        if (typeof this.options.logger === 'function') {
            this.options.logger(`[AudioSegmentProcessor] ${message}`, data);
        }
    }

    /**
     * Process an audio chunk and return any detected segments.
     */
    processAudioData(
        chunk: Float32Array,
        currentTime: number,
        energy: number
    ): ProcessedSegment[] {
        if (!chunk || !chunk.length) return [];

        const segments: ProcessedSegment[] = [];
        const isSpeech = energy > this.options.energyThreshold;

        // Update silence duration tracking
        if (!isSpeech) {
            const chunkDurationSec = chunk.length / this.options.sampleRate;
            this.state.silenceDuration += chunkDurationSec;
        } else {
            this.state.silenceDuration = 0;
        }

        // Update noise floor and calculate SNR
        this.updateNoiseFloor(energy, isSpeech);
        const snr = this.calculateSNR(energy);

        // Track recent chunks for lookback
        this.state.recentChunks.push({
            time: currentTime,
            energy,
            isSpeech,
            snr
        });

        if (this.state.recentChunks.length > this.options.maxHistoryLength * 10) {
            this.state.recentChunks.shift();
        }

        // --- Proactive Segment Splitting ---
        if (this.state.inSpeech && this.state.speechStartTime !== null) {
            const currentSpeechDuration = currentTime - this.state.speechStartTime;
            if (currentSpeechDuration > this.options.maxSegmentDuration) {
                this.log('Splitting long segment', {
                    startTime: this.state.speechStartTime.toFixed(2),
                    splitTime: currentTime.toFixed(2),
                    duration: currentSpeechDuration.toFixed(2)
                });

                const segment = this.createSegment(this.state.speechStartTime, currentTime);
                if (segment) {
                    segments.push(segment);
                }

                // Start new segment immediately
                this.startSpeech(currentTime, energy);
            }
        }

        // --- Speech State Machine ---
        if (!this.state.inSpeech && isSpeech) {
            // Transition: Silence -> Speech
            const realStartIndex = this.findSpeechStart();
            const realStartTime = realStartIndex !== -1
                ? this.state.recentChunks[realStartIndex].time
                : currentTime;

            this.startSpeech(realStartTime, energy);

            this.log('Speech start detected', {
                detectedAt: currentTime.toFixed(2),
                actualStart: realStartTime.toFixed(2),
                lookbackDiff: (currentTime - realStartTime).toFixed(2),
                snr: snr.toFixed(2),
                noiseFloor: this.state.noiseFloor.toFixed(6)
            });
        } else if (this.state.inSpeech && !isSpeech) {
            // Transition: Speech -> potentially Silence
            this.state.silenceCounter++;

            const chunksNeeded = Math.ceil(this.options.silenceThreshold / (this.options.windowSize / this.options.sampleRate));

            if (this.state.silenceCounter % 5 === 0) {
                this.log('Silence progressing', {
                    counter: this.state.silenceCounter,
                    needed: chunksNeeded,
                    energy: energy.toFixed(6),
                    snr: snr.toFixed(2)
                });
            }

            // Implement ending speech tolerance and max silence within speech
            const silenceDuration = this.state.silenceCounter * (this.options.windowSize / this.options.sampleRate);
            const isConfirmedSilence = this.state.silenceCounter >= chunksNeeded;

            // Check if we should allow some silence within speech
            if (silenceDuration < this.options.maxSilenceWithinSpeech) {
                // Not yet enough silence to consider it a break
                this.state.speechEnergies.push(energy);
            } else if (isConfirmedSilence) {
                // Confirmed silence - end speech segment
                if (this.state.speechStartTime !== null) {
                    const speechDuration = currentTime - this.state.speechStartTime;
                    const avgEnergy = this.state.speechEnergies.length > 0
                        ? this.state.speechEnergies.reduce((a, b) => a + b, 0) / this.state.speechEnergies.length
                        : 0;

                    this.state.speechStats.push({
                        startTime: this.state.speechStartTime,
                        endTime: currentTime,
                        duration: speechDuration,
                        avgEnergy,
                        energyIntegral: avgEnergy * speechDuration
                    });

                    if (this.state.speechStats.length > this.options.maxHistoryLength) {
                        this.state.speechStats.shift();
                    }
                }

                const segment = this.createSegment(this.state.speechStartTime!, currentTime);
                if (segment) {
                    segments.push(segment);
                }

                this.startSilence(currentTime);
            } else {
                // Accumulate silence energies while deciding
                this.state.silenceEnergies.push(energy);
            }
        } else {
            // Continue in current state
            if (this.state.inSpeech) {
                this.state.speechEnergies.push(energy);
            } else {
                this.state.silenceEnergies.push(energy);
            }
        }

        this.updateStats();

        return segments;
    }

    /**
     * Update noise floor using adaptive exponential moving average.
     */
    private updateNoiseFloor(energy: number, isSpeech: boolean): void {
        if (!isSpeech) {
            // Blend between fast and normal adaptation rates based on silence duration
            let adaptationRate = this.options.noiseFloorAdaptationRate;

            if (this.state.silenceDuration < this.options.minBackgroundDuration) {
                const blendFactor = Math.min(1, this.state.silenceDuration / this.options.minBackgroundDuration);
                adaptationRate = this.options.fastAdaptationRate * (1 - blendFactor) +
                    this.options.noiseFloorAdaptationRate * blendFactor;
            }

            // Exponential moving average for noise floor tracking
            this.state.noiseFloor = this.state.noiseFloor * (1 - adaptationRate) + energy * adaptationRate;
            this.state.noiseFloor = Math.max(0.00001, this.state.noiseFloor);
        }

        // Track recent energies for analysis
        this.state.recentEnergies.push(energy);
        if (this.state.recentEnergies.length > 50) {
            this.state.recentEnergies.shift();
        }
    }

    /**
     * Calculate Signal-to-Noise Ratio in dB.
     */
    private calculateSNR(energy: number): number {
        const noiseFloor = Math.max(0.0001, this.state.noiseFloor);
        return 10 * Math.log10(energy / noiseFloor);
    }

    /**
     * Start tracking a new speech segment.
     */
    private startSpeech(time: number, energy: number): void {
        this.state.inSpeech = true;
        this.state.speechStartTime = time;
        this.state.silenceCounter = 0;
        this.state.speechEnergies = [energy];
        this.state.silenceStartTime = null;
        this.state.silenceDuration = 0;

        const snr = this.calculateSNR(energy);
        this.log('Speech state started', {
            time: time.toFixed(2),
            energy: energy.toFixed(6),
            snr: snr.toFixed(2),
            noiseFloor: this.state.noiseFloor.toFixed(6)
        });
    }

    /**
     * Transition to silence state.
     */
    private startSilence(time: number): void {
        this.state.inSpeech = false;
        this.state.silenceStartTime = time;
        this.state.speechStartTime = null;
        this.state.silenceCounter = 0;
        this.state.silenceEnergies = [];
        this.state.silenceDuration = 0.001; // Avoid division by zero

        this.log('Silence state started', {
            time: time.toFixed(2),
            noiseFloor: this.state.noiseFloor.toFixed(6)
        });
    }

    /**
     * Find the actual speech start using lookback and energy trend analysis.
     */
    private findSpeechStart(): number {
        const chunks = this.state.recentChunks;
        const minSnrThreshold = this.options.minSnrThreshold;

        // Find the most recent speech chunk
        let firstSpeechIndex = 0;
        for (let i = chunks.length - 1; i >= 0; i--) {
            if (chunks[i].isSpeech) {
                firstSpeechIndex = i;
                break;
            }
        }

        // Look for the earliest point where energy starts rising towards speech
        let earliestRisingIndex = firstSpeechIndex;
        let foundRisingTrend = false;

        for (let i = firstSpeechIndex - 1; i >= 0; i--) {
            // Check for rising energy trend
            if (i < chunks.length - 1 &&
                chunks[i + 1].energy > chunks[i].energy * (1 + this.options.energyRiseThreshold)) {
                earliestRisingIndex = i;
                foundRisingTrend = true;
            }

            // Stop if SNR drops significantly below threshold
            if (chunks[i].snr < minSnrThreshold / 2) {
                break;
            }

            // Limit lookback to ~500ms (assuming 80ms chunks)
            if (firstSpeechIndex - i > 6) {
                break;
            }
        }

        if (foundRisingTrend) {
            this.log('Found rising energy trend for speech onset', {
                index: earliestRisingIndex,
                time: chunks[earliestRisingIndex].time.toFixed(3),
                energy: chunks[earliestRisingIndex].energy.toFixed(6),
                snr: chunks[earliestRisingIndex].snr.toFixed(2)
            });
            return earliestRisingIndex;
        }

        // Check for SNR crossing
        for (let i = firstSpeechIndex; i >= 0; i--) {
            if (chunks[i].snr < minSnrThreshold) {
                return Math.min(chunks.length - 1, i + 1);
            }
        }

        // Default lookback
        return Math.max(0, firstSpeechIndex - 4);
    }

    /**
     * Create a segment object from start/end times.
     */
    private createSegment(startTime: number, endTime: number): ProcessedSegment | null {
        const duration = endTime - startTime;

        if (duration <= 0) {
            this.log('Skipping segment with zero/negative duration');
            return null;
        }

        return {
            startTime,
            endTime,
            duration
        };
    }

    /**
     * Update internal statistics.
     */
    private updateStats(): void {
        const stats: CurrentStats = {
            silence: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
            speech: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
            noiseFloor: this.state.noiseFloor,
            snr: this.state.recentChunks.length > 0
                ? this.state.recentChunks[this.state.recentChunks.length - 1].snr
                : 0,
            snrThreshold: this.options.snrThreshold,
            minSnrThreshold: this.options.minSnrThreshold,
            energyRiseThreshold: this.options.energyRiseThreshold
        };

        if (this.state.silenceStats.length > 0) {
            stats.silence = {
                avgDuration: this.average(this.state.silenceStats.map(s => s.duration)),
                avgEnergy: this.average(this.state.silenceStats.map(s => s.avgEnergy)),
                avgEnergyIntegral: this.average(this.state.silenceStats.map(s => s.energyIntegral))
            };
        }

        if (this.state.speechStats.length > 0) {
            stats.speech = {
                avgDuration: this.average(this.state.speechStats.map(s => s.duration)),
                avgEnergy: this.average(this.state.speechStats.map(s => s.avgEnergy)),
                avgEnergyIntegral: this.average(this.state.speechStats.map(s => s.energyIntegral))
            };
        }

        this.state.currentStats = stats;
    }

    private average(arr: number[]): number {
        if (arr.length === 0) return 0;
        return arr.reduce((a, b) => a + b, 0) / arr.length;
    }

    /**
     * Get current statistics.
     */
    getStats(): CurrentStats {
        return this.state.currentStats;
    }

    /**
     * Get current state info for debugging.
     */
    getStateInfo(): { inSpeech: boolean; noiseFloor: number; snr: number; speechStartTime: number | null } {
        return {
            inSpeech: this.state.inSpeech,
            noiseFloor: this.state.noiseFloor,
            snr: this.state.currentStats.snr,
            speechStartTime: this.state.speechStartTime
        };
    }

    /**
     * Reset all state.
     */
    reset(): void {
        this.state = {
            inSpeech: false,
            speechStartTime: null,
            silenceStartTime: null,
            silenceCounter: 0,
            recentChunks: [],
            speechEnergies: [],
            silenceEnergies: [],
            speechStats: [],
            silenceStats: [],
            currentStats: {
                silence: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
                speech: { avgDuration: 0, avgEnergy: 0, avgEnergyIntegral: 0 },
                noiseFloor: 0.005,
                snr: 0,
                snrThreshold: this.options.snrThreshold,
                minSnrThreshold: this.options.minSnrThreshold,
                energyRiseThreshold: this.options.energyRiseThreshold
            },
            segmentCounter: 0,
            noiseFloor: 0.005,
            recentEnergies: [],
            silenceDuration: 0
        };
    }

    // ========================================================================
    // Configuration Setters
    // ========================================================================

    setThreshold(threshold: number): void {
        this.options.energyThreshold = threshold;
        this.log('Updated energy threshold', threshold);
    }

    setSilenceLength(length: number): void {
        this.options.silenceThreshold = length;
        this.log('Updated silence threshold', length);
    }

    setLookbackDuration(duration: number): void {
        this.options.lookbackDuration = duration;
        this.log('Updated lookback duration', duration);
    }

    setOverlapDuration(duration: number): void {
        this.options.overlapDuration = duration;
        this.log('Updated overlap duration', duration);
    }

    setSnrThreshold(threshold: number): void {
        this.options.snrThreshold = threshold;
        this.log('Updated SNR threshold', threshold);
    }

    setMinSnrThreshold(threshold: number): void {
        this.options.minSnrThreshold = threshold;
        this.log('Updated minimum SNR threshold', threshold);
    }

    setNoiseFloorAdaptationRate(rate: number): void {
        this.options.noiseFloorAdaptationRate = rate;
        this.log('Updated noise floor adaptation rate', rate);
    }

    setFastAdaptationRate(rate: number): void {
        this.options.fastAdaptationRate = rate;
        this.log('Updated fast adaptation rate', rate);
    }

    setEnergyRiseThreshold(threshold: number): void {
        this.options.energyRiseThreshold = threshold;
        this.log('Updated energy rise threshold', threshold);
    }

    setMinBackgroundDuration(duration: number): void {
        this.options.minBackgroundDuration = duration;
        this.log('Updated minimum background duration', duration);
    }

    setMaxSegmentDuration(duration: number): void {
        this.options.maxSegmentDuration = duration;
        this.log('Updated maximum segment duration', duration);
    }

    setMinSpeechDuration(duration: number): void {
        this.options.minSpeechDuration = duration;
        this.log('Updated minimum speech duration', duration);
    }

    setMaxSilenceWithinSpeech(duration: number): void {
        this.options.maxSilenceWithinSpeech = duration;
        this.log('Updated max silence within speech', duration);
    }

    setEndingSpeechTolerance(duration: number): void {
        this.options.endingSpeechTolerance = duration;
        this.log('Updated ending speech tolerance', duration);
    }
}