#!/usr/bin/env python3
"""MELD English test sets — E2E server test.

Uploads 8 MELD test WAVs to the deployed HF Spaces server,
runs the full pipeline (Stage 1→2→3), and compares results
against ground truth emotion labels.

Usage:
    python scripts/test_meld_e2e_server.py
    python scripts/test_meld_e2e_server.py --server http://localhost:8000
"""

from __future__ import annotations

import argparse
import json
import sys
import time
from pathlib import Path

import requests

PROJECT_ROOT = Path(__file__).resolve().parent.parent
MELD_DIR = PROJECT_ROOT / "data" / "meld_test"
GT_PATH = MELD_DIR / "ground_truth.json"

DEFAULT_SERVER = "https://bbbakery-ustwo-api.hf.space"
POLL_INTERVAL = 5  # seconds
MAX_WAIT = 300  # 5 minutes per file


def health_check(base: str) -> bool:
    try:
        r = requests.get(f"{base}/api/health", timeout=10)
        data = r.json()
        if data.get("status") == "ok":
            print(f"  ✅ Server OK ({data.get('timestamp', '?')})")
            return True
    except Exception as e:
        print(f"  ❌ Health check failed: {e}")
    return False


def upload(base: str, wav_path: Path) -> str | None:
    """Upload WAV and return call_id."""
    with open(wav_path, "rb") as f:
        r = requests.post(
            f"{base}/api/upload",
            files={"file": (wav_path.name, f, "audio/wav")},
            timeout=60,
        )
    if r.status_code != 200:
        print(f"    ❌ Upload failed ({r.status_code}): {r.text[:200]}")
        return None
    data = r.json()
    return data.get("call_id")


def analyze_and_poll(base: str, call_id: str) -> dict | None:
    """Start analysis and poll until done."""
    # Start
    r = requests.post(f"{base}/api/analyze", params={"call_id": call_id}, timeout=30)
    if r.status_code not in (200, 202):
        print(f"    ❌ Analyze start failed ({r.status_code}): {r.text[:200]}")
        return None

    data = r.json()
    if data.get("status") == "done":
        return data.get("result")

    # Poll
    elapsed = 0
    while elapsed < MAX_WAIT:
        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL

        r = requests.get(f"{base}/api/analyze/{call_id}/status", timeout=15)
        data = r.json()
        status = data.get("status")

        if status == "done":
            return data.get("result")
        elif status == "error":
            print(f"    ❌ Pipeline error: {data.get('error', '?')}")
            return None

        mins, secs = divmod(elapsed, 60)
        print(f"    ⏳ {status}... ({int(mins)}m{int(secs)}s)", end="\r")

    print(f"    ❌ Timeout after {MAX_WAIT}s")
    return None


def extract_emotions(result: dict) -> dict:
    """Extract emotion info from Stage 3 result."""
    info = {}

    # Character reactions → emotions
    reactions = result.get("character_reactions", [])
    for i, rx in enumerate(reactions):
        speaker = rx.get("speaker_id", f"speaker_{i}")
        info[f"speaker_{i}_state"] = rx.get("solo_state", "?")

    # Garden update
    garden = result.get("garden_update", {})
    info["garden_mood"] = garden.get("mood", "?")
    info["garden_delta"] = garden.get("growth_delta", 0)

    # Recap
    recap = result.get("recap_card", {})
    info["recap_headline"] = recap.get("headline", "?")

    # Stage 2 emotions (if exposed in result)
    stage2 = result.get("stage2_output", {})
    if stage2:
        for spk, summary in stage2.get("speaker_summaries", {}).items():
            info[f"{spk}_dominant"] = summary.get("dominant_emotion", "?")
            info[f"{spk}_distribution"] = summary.get("emotion_distribution", {})

    return info


def main():
    parser = argparse.ArgumentParser(description="MELD E2E server test")
    parser.add_argument("--server", default=DEFAULT_SERVER, help="Server base URL")
    args = parser.parse_args()
    base = args.server.rstrip("/")

    print("=" * 70)
    print("  MELD E2E Server Test")
    print(f"  Server: {base}")
    print("=" * 70)

    # Health check
    print("\n[1] Health check")
    if not health_check(base):
        sys.exit(1)

    # Load ground truth
    print("\n[2] Loading ground truth")
    with open(GT_PATH) as f:
        gt = json.load(f)
    print(f"  {len(gt)} test sets loaded")

    # Process each test set
    print("\n[3] Running E2E tests\n")
    results = {}
    pass_count = 0
    fail_count = 0

    for tag in sorted(gt.keys()):
        wav_path = MELD_DIR / f"{tag}.wav"
        if not wav_path.exists():
            print(f"  ⚠️  {tag}: WAV not found, skipping")
            continue

        gt_entry = gt[tag]
        print(f"  📦 {tag} — {gt_entry['description']}")
        print(f"     Primary: {gt_entry['primary_emotion']} | Duration: {gt_entry['duration_sec']}s | Utts: {gt_entry['total_utterances']}")

        # Upload
        call_id = upload(base, wav_path)
        if not call_id:
            fail_count += 1
            results[tag] = {"status": "upload_failed"}
            continue
        print(f"     Upload OK → {call_id}")

        # Analyze + poll
        print(f"     Analyzing...", end="")
        start_time = time.time()
        result = analyze_and_poll(base, call_id)
        elapsed = time.time() - start_time

        if not result:
            fail_count += 1
            results[tag] = {"status": "analyze_failed", "call_id": call_id}
            print()
            continue

        print(f"\r     ✅ Done in {elapsed:.1f}s")

        # Extract emotions
        emotions = extract_emotions(result)

        # Check pipeline completeness
        has_reactions = len(result.get("character_reactions", [])) > 0
        has_garden = "garden_update" in result
        has_recap = "recap_card" in result

        status = "pass" if (has_reactions and has_garden and has_recap) else "partial"
        if status == "pass":
            pass_count += 1
        else:
            fail_count += 1

        results[tag] = {
            "status": status,
            "call_id": call_id,
            "elapsed_sec": round(elapsed, 1),
            "has_reactions": has_reactions,
            "has_garden": has_garden,
            "has_recap": has_recap,
            "emotions": emotions,
            "full_result": result,
            "ground_truth": {
                "primary_emotion": gt_entry["primary_emotion"],
                "emotion_distribution": gt_entry["emotion_distribution"],
            },
        }

        # Print details
        print(f"     Reactions: {'✅' if has_reactions else '❌'} | Garden: {'✅' if has_garden else '❌'} | Recap: {'✅' if has_recap else '❌'}")
        for k, v in emotions.items():
            if not k.startswith("full_"):
                print(f"     {k}: {v}")
        print()

    # Save results
    out_path = MELD_DIR / "e2e_results.json"
    with open(out_path, "w", encoding="utf-8") as f:
        # Don't save full_result to keep file manageable
        save_results = {}
        for tag, r in results.items():
            save_copy = {k: v for k, v in r.items() if k != "full_result"}
            save_results[tag] = save_copy
        json.dump(save_results, f, indent=2, ensure_ascii=False)

    # Summary
    print("=" * 70)
    print("  SUMMARY")
    print("=" * 70)
    print(f"\n  {'Tag':<25} {'Status':<10} {'Time':>6} {'Reactions':>10} {'Garden':>8} {'Recap':>7}")
    print("  " + "-" * 70)
    for tag in sorted(results.keys()):
        r = results[tag]
        status_icon = "✅" if r["status"] == "pass" else "❌"
        elapsed = f"{r.get('elapsed_sec', 0):.1f}s" if "elapsed_sec" in r else "—"
        react = "✅" if r.get("has_reactions") else "❌"
        garden = "✅" if r.get("has_garden") else "❌"
        recap = "✅" if r.get("has_recap") else "❌"
        print(f"  {tag:<25} {status_icon:<10} {elapsed:>6} {react:>10} {garden:>8} {recap:>7}")

    print(f"\n  Total: {pass_count} pass / {fail_count} fail / {len(results)} total")
    print(f"  Results saved: {out_path}")
    print("=" * 70)

    return fail_count == 0


if __name__ == "__main__":
    success = main()
    sys.exit(0 if success else 1)