#!/usr/bin/env python3
"""20Hours Korean demo set — E2E server test.

Uploads 7 curated Korean demo WAVs to the deployed HF Spaces server,
runs the full pipeline (Stage 1 → 2 → 3), and compares results
against the intended demo emotion labels (data/20hours_test/ground_truth.json).

Usage:
    python scripts/test_20hours_e2e_server.py
    python scripts/test_20hours_e2e_server.py --server http://localhost:8000
"""

from __future__ import annotations

import argparse
import json
import sys
import time
from pathlib import Path

import requests

PROJECT_ROOT = Path(__file__).resolve().parent.parent
TEST_DIR = PROJECT_ROOT / "data" / "20hours_test"
GT_PATH = TEST_DIR / "ground_truth.json"

DEFAULT_SERVER = "https://bbbakery-ustwo-api.hf.space"
POLL_INTERVAL = 5
MAX_WAIT = 300


def health_check(base: str) -> bool:
    try:
        r = requests.get(f"{base}/api/health", timeout=10)
        data = r.json()
        if data.get("status") == "ok":
            print(f"  Server OK ({data.get('timestamp', '?')})")
            return True
    except Exception as e:
        print(f"  Health check failed: {e}")
    return False


def upload(base: str, wav_path: Path) -> str | None:
    with open(wav_path, "rb") as f:
        r = requests.post(
            f"{base}/api/upload",
            files={"file": (wav_path.name, f, "audio/wav")},
            timeout=60,
        )
    if r.status_code != 200:
        print(f"    Upload failed ({r.status_code}): {r.text[:200]}")
        return None
    return r.json().get("call_id")


def analyze_and_poll(base: str, call_id: str) -> dict | None:
    r = requests.post(f"{base}/api/analyze", params={"call_id": call_id}, timeout=30)
    if r.status_code not in (200, 202):
        print(f"    Analyze start failed ({r.status_code}): {r.text[:200]}")
        return None

    data = r.json()
    if data.get("status") == "done":
        return data.get("result")

    elapsed = 0
    while elapsed < MAX_WAIT:
        time.sleep(POLL_INTERVAL)
        elapsed += POLL_INTERVAL
        r = requests.get(f"{base}/api/analyze/{call_id}/status", timeout=15)
        data = r.json()
        status = data.get("status")
        if status == "done":
            return data.get("result")
        if status == "error":
            print(f"    Pipeline error: {data.get('error', '?')}")
            return None
        mins, secs = divmod(elapsed, 60)
        print(f"    {status}... ({int(mins)}m{int(secs)}s)", end="\r")

    print(f"    Timeout after {MAX_WAIT}s")
    return None


def extract_emotions(result: dict) -> dict:
    info: dict = {}
    reactions = result.get("character_reactions", [])
    for i, rx in enumerate(reactions):
        info[f"speaker_{i}_state"] = rx.get("solo_state", "?")
    garden = result.get("garden_update", {})
    info["garden_mood"] = garden.get("mood", "?")
    info["garden_delta"] = garden.get("growth_delta", 0)
    recap = result.get("recap_card", {}) or {}
    info["recap_headline"] = recap.get("headline") or recap.get("title", "?")

    stage2 = result.get("stage2_output", {})
    if stage2:
        for spk, summary in stage2.get("speaker_summaries", {}).items():
            info[f"{spk}_dominant"] = summary.get("dominant_emotion", "?")
            info[f"{spk}_distribution"] = summary.get("emotion_distribution", {})

    # Segment-level language breakdown
    emotions = result.get("emotions") or stage2.get("emotions", [])
    segs_by_lang: dict[str, int] = {}
    for e in emotions:
        lang = e.get("language") or "?"
        segs_by_lang[lang] = segs_by_lang.get(lang, 0) + 1
    info["segments"] = len(emotions)
    info["segments_by_lang"] = segs_by_lang
    return info


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--server", default=DEFAULT_SERVER)
    args = parser.parse_args()
    base = args.server.rstrip("/")

    print("=" * 70)
    print("  20Hours Korean Demo — E2E Server Test")
    print(f"  Server: {base}")
    print("=" * 70)

    print("\n[1] Health check")
    if not health_check(base):
        sys.exit(1)

    print("\n[2] Loading intended emotion labels")
    gt = json.loads(GT_PATH.read_text())
    print(f"  {len(gt)} demo clips loaded")

    print("\n[3] Running E2E tests\n")
    results = {}
    hit = 0
    total = 0

    for tag in sorted(gt.keys()):
        wav_path = TEST_DIR / f"{tag}.wav"
        if not wav_path.exists():
            print(f"  {tag}: WAV not found, skipping")
            continue
        gt_entry = gt[tag]
        print(f"  {tag} — {gt_entry['description'][:55]}")
        print(f"     Intended: {gt_entry['primary_emotion']} | Duration: {gt_entry['duration_sec']}s | Utts: {gt_entry['total_utterances']}")

        call_id = upload(base, wav_path)
        if not call_id:
            results[tag] = {"status": "upload_failed"}
            continue
        print(f"     Upload OK → {call_id}")

        print(f"     Analyzing...", end="")
        start_time = time.time()
        result = analyze_and_poll(base, call_id)
        elapsed = time.time() - start_time

        if not result:
            results[tag] = {"status": "analyze_failed", "call_id": call_id}
            print()
            continue

        print(f"\r     Done in {elapsed:.1f}s                   ")

        emotions = extract_emotions(result)
        total += 1
        intended = gt_entry["primary_emotion"]
        speaker_states = {k: v for k, v in emotions.items() if k.endswith("_state")}
        if intended in speaker_states.values():
            hit += 1
            match = "HIT"
        else:
            match = "miss"

        results[tag] = {
            "status": "pass",
            "call_id": call_id,
            "elapsed_sec": round(elapsed, 1),
            "intended_emotion": intended,
            "match": match,
            "emotions": emotions,
            "full_result": result,
        }

        for k, v in emotions.items():
            if not k.endswith("_distribution"):
                print(f"     {k}: {v}")
            else:
                dist = ", ".join(f"{kk}:{vv:.2f}" for kk, vv in sorted(v.items(), key=lambda x: -x[1])[:3])
                print(f"     {k}: {dist}")
        print(f"     → {match}")
        print()

    out_path = TEST_DIR / "e2e_results.json"
    save = {}
    for tag, r in results.items():
        save[tag] = {k: v for k, v in r.items() if k != "full_result"}
    out_path.write_text(json.dumps(save, indent=2, ensure_ascii=False))

    print("=" * 70)
    print("  SUMMARY")
    print("=" * 70)
    print(f"\n  {'Tag':<24} {'Intended':<12} {'Match':<6} {'Time':>6}")
    print("  " + "-" * 55)
    for tag in sorted(results.keys()):
        r = results[tag]
        if r.get("status") != "pass":
            continue
        intended = r["intended_emotion"]
        m = r["match"]
        t = f"{r['elapsed_sec']:.0f}s"
        print(f"  {tag:<24} {intended:<12} {m:<6} {t:>6}")
    print(f"\n  Intended-emotion match: {hit}/{total}")
    print(f"  Results saved: {out_path}")


if __name__ == "__main__":
    main()