"""Phase 17.2 — Audio Deepfake Detection.

Extracts the audio track from a video with ffmpeg, then applies signal-processing
heuristics (silence ratio, spectral centroid variance, RMS consistency) to produce
an audio_authenticity_score (0–100, higher = more natural/authentic).

AI-generated speech typically exhibits:
  - Near-zero silence between words (no natural breath pauses)
  - Very low spectral-centroid variance (monotone formant trajectory)
  - Unnaturally consistent RMS energy across voiced frames
"""
from __future__ import annotations

import os
import subprocess
import tempfile
from dataclasses import dataclass
from typing import Optional

import numpy as np
from loguru import logger


@dataclass
class AudioAnalysis:
    audio_authenticity_score: float  # 0–100
    has_audio: bool
    duration_s: float
    silence_ratio: float             # fraction of 25ms frames below RMS threshold
    spectral_variance: float         # normalised std of spectral centroid
    rms_consistency: float           # 1 – normalised std of voiced-frame RMS
    notes: str = ""
    ml_analysis: dict | None = None


# ---------------------------------------------------------------------------
# ffmpeg extraction
# ---------------------------------------------------------------------------

def _extract_audio_wav(video_path: str, out_path: str) -> bool:
    """Extract mono 16 kHz WAV from *video_path* into *out_path* via ffmpeg."""
    try:
        result = subprocess.run(
            [
                "ffmpeg", "-y", "-i", video_path,
                "-vn", "-acodec", "pcm_s16le",
                "-ar", "16000", "-ac", "1",
                out_path,
            ],
            capture_output=True,
            timeout=60,
        )
        if result.returncode != 0 or not os.path.exists(out_path) or os.path.getsize(out_path) == 0:
            stderr_tail = result.stderr.decode(errors="replace")[-400:].strip()
            logger.warning(f"ffmpeg exited {result.returncode} — {stderr_tail or '(no stderr)'}")
            return False
        return True
    except (FileNotFoundError, subprocess.TimeoutExpired, OSError) as exc:
        logger.warning(f"ffmpeg audio extraction failed: {exc}")
        return False


# ---------------------------------------------------------------------------
# Signal-processing analysis
# ---------------------------------------------------------------------------

def _analyse_wav(wav_path: str) -> AudioAnalysis:
    try:
        from scipy.io import wavfile  # scipy already in requirements
        sr, data = wavfile.read(wav_path)
    except Exception as exc:  # noqa: BLE001
        logger.warning(f"WAV read failed: {exc}")
        return AudioAnalysis(
            audio_authenticity_score=50.0, has_audio=True,
            duration_s=0.0, silence_ratio=0.0,
            spectral_variance=0.0, rms_consistency=0.0,
            notes="wav_read_failed",
        )

    # Flatten stereo → mono
    if data.ndim > 1:
        data = data[:, 0]

    data = data.astype(np.float32) / (np.iinfo(np.int16).max + 1)
    duration_s = float(len(data) / sr)

    if duration_s < 0.1:
        return AudioAnalysis(
            audio_authenticity_score=50.0, has_audio=True,
            duration_s=round(duration_s, 3), silence_ratio=1.0,
            spectral_variance=0.0, rms_consistency=0.0,
            notes="too_short",
        )

    # --- 25ms framing ---
    frame_len = max(1, int(sr * 0.025))
    hop_len = max(1, frame_len // 2)
    frames = [
        data[i: i + frame_len]
        for i in range(0, len(data) - frame_len, hop_len)
    ]
    if not frames:
        return AudioAnalysis(
            audio_authenticity_score=50.0, has_audio=True,
            duration_s=round(duration_s, 3), silence_ratio=1.0,
            spectral_variance=0.0, rms_consistency=0.0,
            notes="no_frames",
        )

    rms_vals = np.array([np.sqrt(np.mean(f ** 2)) for f in frames])

    # Silence ratio
    SILENCE_THRESH = 0.01
    silence_ratio = float(np.mean(rms_vals < SILENCE_THRESH))

    # Spectral centroid variance
    freqs = np.fft.rfftfreq(frame_len, d=1.0 / sr)
    centroids: list[float] = []
    for frame in frames:
        spec = np.abs(np.fft.rfft(frame))
        total = float(np.sum(spec))
        if total < 1e-9:
            continue
        centroids.append(float(np.dot(freqs, spec) / total))

    spec_var = (
        float(np.std(centroids) / (np.mean(centroids) + 1e-6))
        if centroids else 0.0
    )

    # RMS consistency on voiced frames
    voiced = rms_vals[rms_vals >= SILENCE_THRESH]
    if len(voiced) > 0:
        rms_consistency = float(
            1.0 - min(1.0, np.std(voiced) / (np.mean(voiced) + 1e-6))
        )
    else:
        rms_consistency = 0.5

    # --- Heuristic scoring ---
    # Silence score: natural speech has moderate pauses (0.1–0.6).
    #   < 0.05 → no pauses (suspicious); > 0.85 → near-silent (unclear).
    if silence_ratio < 0.05:
        silence_score = 55.0
    elif silence_ratio > 0.85:
        silence_score = 50.0
    else:
        silence_score = 100.0

    # Spectral variance score: natural formant motion gives spec_var > 0.25.
    spec_score = min(100.0, spec_var * 250.0)

    # RMS consistency: > 0.92 = unnaturally even (TTS/vocoder artifact).
    rms_score = 55.0 if rms_consistency > 0.92 else 100.0

    audio_score = float(
        0.30 * silence_score + 0.50 * spec_score + 0.20 * rms_score
    )
    audio_score = max(20.0, min(100.0, audio_score))

    logger.info(
        f"Audio: dur={duration_s:.1f}s silence={silence_ratio:.2f} "
        f"spec_var={spec_var:.4f} rms_cons={rms_consistency:.4f} "
        f"→ audio_score={audio_score:.1f}"
    )

    return AudioAnalysis(
        audio_authenticity_score=round(audio_score, 2),
        has_audio=True,
        duration_s=round(duration_s, 2),
        silence_ratio=round(silence_ratio, 4),
        spectral_variance=round(spec_var, 4),
        rms_consistency=round(rms_consistency, 4),
    )


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def analyze_audio(video_path: str) -> Optional[AudioAnalysis]:
    """Extract and analyse the audio track from *video_path*.

    Returns an AudioAnalysis dataclass, or None if no audio track is present
    or if ffmpeg is unavailable.
    """
    tmp_wav: Optional[str] = None
    try:
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fh:
            tmp_wav = fh.name

        if not _extract_audio_wav(video_path, tmp_wav):
            logger.info("No audio track found or ffmpeg unavailable — skipping audio analysis")
            return None

        analysis = _analyse_wav(tmp_wav)
        
        from services.audio_ml_service import analyze_audio_ml
        ml_score = analyze_audio_ml(tmp_wav)
        analysis.ml_analysis = ml_score
        
        heuristics_prob = 1.0 - (analysis.audio_authenticity_score / 100.0)
        final_prob = 0.5 * heuristics_prob + 0.5 * ml_score["fake_probability"]
        analysis.audio_authenticity_score = round((1.0 - final_prob) * 100.0, 2)
        
        return analysis

    except Exception as exc:  # noqa: BLE001
        logger.warning(f"Audio analysis error: {exc}")
        return None

    finally:
        if tmp_wav and os.path.exists(tmp_wav):
            try:
                os.unlink(tmp_wav)
            except OSError:
                pass