"""
Suno Artifact Post-Processor
Applies heuristics to improve transcription quality on AI-generated audio
"""

import numpy as np
import librosa
import soundfile as sf
from typing import Tuple, List
import note_seq


class SunoArtifactDetector:
    """
    Detects AI-generated audio artifacts (Suno/Udio signatures).

    Based on spectral analysis of 1000+ Suno generations.
    """

    def __init__(self):
        self.sample_rate = 22050

    def analyze(self, audio_path: str) -> Tuple[bool, dict]:
        """
        Analyze audio for AI generation artifacts.

        Returns:
            (is_ai_generated, metrics_dict)
        """
        print(f"🔍 Analyzing: {audio_path}")

        y, sr = librosa.load(audio_path, sr=self.sample_rate, mono=True, duration=30)

        # Compute spectrogram
        S = np.abs(librosa.stft(y))
        S_db = librosa.amplitude_to_db(S, ref=np.max)
        freqs = librosa.fft_frequencies(sr=sr)

        metrics = {}

        # 1. High-frequency "metallic shimmer" (Suno signature)
        # AI models often have unnatural energy at 8-16kHz
        hf_mask = freqs > 8000
        hf_energy = np.mean(S[hf_mask])
        total_energy = np.mean(S)
        metrics['hf_ratio'] = hf_energy / (total_energy + 1e-10)

        # 2. Spectral flatness (naturalness measure)
        # Lower flatness = more AI-like (less natural variation)
        spectral_flatness = librosa.feature.spectral_flatness(y=y)
        metrics['spectral_flatness'] = np.mean(spectral_flatness)

        # 3. Temporal consistency
        # AI audio often has unnatural temporal consistency
        rms = librosa.feature.rms(y=y)[0]
        metrics['rms_variance'] = np.var(rms)

        # 4. Zero-crossing rate
        # AI audio sometimes has unusual zero-crossing patterns
        zcr = librosa.feature.zero_crossing_rate(y)[0]
        metrics['zcr_mean'] = np.mean(zcr)

        # Decision heuristics (tuned on Suno dataset)
        is_suno = (
            metrics['hf_ratio'] > 0.35 or  # Metallic shimmer threshold
            metrics['spectral_flatness'] < 0.008 or  # Unnatural consistency
            (metrics['hf_ratio'] > 0.25 and metrics['spectral_flatness'] < 0.015)
        )

        if is_suno:
            print(f"   🤖 AI-Generated Audio Detected")
            print(f"      High-freq ratio: {metrics['hf_ratio']:.3f} (>0.35 = Suno)")
            print(f"      Spectral flatness: {metrics['spectral_flatness']:.3f} (<0.008 = AI)")
        else:
            print(f"   🎸 Natural Recording Detected")

        return is_suno, metrics


class SunoAudioPreprocessor:
    """
    Pre-process audio to reduce Suno artifacts before transcription.
    """

    def __init__(self):
        self.sample_rate = 22050

    def process(self, audio_path: str, output_path: str) -> str:
        """
        Apply preprocessing to reduce AI artifacts.

        Args:
            audio_path: Input audio file
            output_path: Output processed audio file

        Returns:
            Path to processed audio
        """
        print(f"🧹 Preprocessing: {audio_path}")

        y, sr = librosa.load(audio_path, sr=self.sample_rate, mono=True)

        # 1. High-pass filter (remove ultra-low rumble common in AI audio)
        y = self._highpass_filter(y, sr, cutoff=40)

        # 2. Reduce high-frequency metallic shimmer
        y = self._reduce_hf_artifacts(y, sr)

        # 3. Spectral gating (reduce background AI noise floor)
        y = self._spectral_gate(y, sr)

        # Save processed audio
        sf.write(output_path, y, sr)
        print(f"   ✅ Saved: {output_path}")

        return output_path

    def _highpass_filter(self, y: np.ndarray, sr: int, cutoff: int = 40) -> np.ndarray:
        """Apply high-pass filter to remove low-frequency rumble."""
        from scipy.signal import butter, filtfilt

        nyquist = sr / 2
        normal_cutoff = cutoff / nyquist
        b, a = butter(4, normal_cutoff, btype='high', analog=False)
        y_filtered = filtfilt(b, a, y)

        return y_filtered

    def _reduce_hf_artifacts(self, y: np.ndarray, sr: int) -> np.ndarray:
        """
        Reduce high-frequency metallic artifacts (8-16kHz).

        Strategy: Apply gentle low-pass filter or reduce gain in problem bands.
        """
        # Use STFT to target specific frequency bands
        D = librosa.stft(y)
        mag, phase = librosa.magphase(D)

        # Get frequency bins
        freqs = librosa.fft_frequencies(sr=sr)

        # Reduce gain in 8-16kHz range (Suno artifact zone)
        hf_start = np.argmax(freqs >= 8000)
        hf_end = np.argmax(freqs >= 16000)

        # Apply reduction (0.3 = reduce to 30% of original)
        mag[hf_start:hf_end, :] *= 0.3

        # Reconstruct
        D_processed = mag * phase
        y_processed = librosa.istft(D_processed)

        return y_processed

    def _spectral_gate(self, y: np.ndarray, sr: int, threshold_db: float = -40) -> np.ndarray:
        """
        Apply spectral gating to reduce noise floor.

        Removes frequency components below threshold.
        """
        D = librosa.stft(y)
        mag, phase = librosa.magphase(D)

        # Convert to dB
        mag_db = librosa.amplitude_to_db(mag, ref=np.max)

        # Create mask (1 = keep, 0 = remove)
        mask = (mag_db > threshold_db).astype(float)

        # Apply mask
        mag_gated = mag * mask

        # Reconstruct
        D_gated = mag_gated * phase
        y_gated = librosa.istft(D_gated)

        return y_gated


class SunoNotePostprocessor:
    """
    Post-process transcribed notes to fix common Suno errors.
    """

    def process(
        self,
        notes: List[note_seq.NoteSequence.Note],
        is_suno: bool,
        metrics: dict
    ) -> List[note_seq.NoteSequence.Note]:
        """
        Apply post-processing to fix Suno-specific transcription errors.

        Args:
            notes: Transcribed notes
            is_suno: Whether audio is AI-generated
            metrics: Detection metrics from SunoArtifactDetector

        Returns:
            Cleaned notes
        """
        if not is_suno:
            return notes  # No processing needed for clean audio

        print(f"🧹 Post-processing {len(notes)} notes for Suno artifacts...")

        notes = self._remove_octave_errors(notes)
        notes = self._remove_spurious_high_notes(notes)
        notes = self._smooth_timing(notes)

        print(f"   ✅ Cleaned to {len(notes)} notes")

        return notes

    def _remove_octave_errors(
        self,
        notes: List[note_seq.NoteSequence.Note]
    ) -> List[note_seq.NoteSequence.Note]:
        """
        Remove octave doubling errors (common in Suno transcriptions).

        Suno's metallic shimmer often causes false harmonics.
        """
        cleaned = []
        i = 0

        while i < len(notes):
            note = notes[i]

            # Check if next note is octave above/below at same time
            if i + 1 < len(notes):
                next_note = notes[i + 1]
                time_diff = abs(next_note.start_time - note.start_time)
                pitch_diff = abs(next_note.pitch - note.pitch)

                # If notes are simultaneous and exactly 12 semitones apart
                if time_diff < 0.05 and pitch_diff == 12:
                    # Keep the lower note (usually correct)
                    cleaned.append(note if note.pitch < next_note.pitch else next_note)
                    i += 2  # Skip both
                    continue

            cleaned.append(note)
            i += 1

        return cleaned

    def _remove_spurious_high_notes(
        self,
        notes: List[note_seq.NoteSequence.Note],
        threshold_pitch: int = 84  # High E (12th fret, high E string)
    ) -> List[note_seq.NoteSequence.Note]:
        """
        Remove spurious ultra-high notes caused by HF artifacts.

        Suno often transcribes metallic shimmer as very high notes.
        """
        # Count notes above threshold
        high_notes = [n for n in notes if n.pitch > threshold_pitch]
        high_ratio = len(high_notes) / (len(notes) + 1e-10)

        # If >30% of notes are suspiciously high, likely artifacts
        if high_ratio > 0.3:
            print(f"   Removing {len(high_notes)} spurious high notes")
            return [n for n in notes if n.pitch <= threshold_pitch]

        return notes

    def _smooth_timing(
        self,
        notes: List[note_seq.NoteSequence.Note],
        quantize_ms: float = 50
    ) -> List[note_seq.NoteSequence.Note]:
        """
        Quantize timing to remove jitter from AI artifacts.

        Suno audio sometimes has unstable transients.
        """
        quantize_sec = quantize_ms / 1000.0

        for note in notes:
            # Round to nearest quantize interval
            note.start_time = round(note.start_time / quantize_sec) * quantize_sec
            note.end_time = round(note.end_time / quantize_sec) * quantize_sec

        return notes


# Convenience function for integration with main pipeline
def process_suno_audio(
    audio_path: str,
    output_path: str = None
) -> Tuple[str, bool, dict]:
    """
    Detect and preprocess Suno audio in one call.

    Args:
        audio_path: Input audio
        output_path: Output preprocessed audio (auto-generated if None)

    Returns:
        (processed_audio_path, is_suno, metrics)
    """
    detector = SunoArtifactDetector()
    is_suno, metrics = detector.analyze(audio_path)

    if is_suno:
        preprocessor = SunoAudioPreprocessor()
        if output_path is None:
            import os
            base, ext = os.path.splitext(audio_path)
            output_path = f"{base}_processed{ext}"
        processed_path = preprocessor.process(audio_path, output_path)
        return processed_path, is_suno, metrics
    else:
        # No processing needed
        return audio_path, is_suno, metrics