"""Headless end-to-end backend test (no Gradio UI).

Runs: sample video -> audio -> Whisper -> scene frames -> LLM step draft ->
assemble -> DOCX, asserting each stage produced output. The LLM step falls back
to a naive sentence-per-step draft if the HuggingFace API is unreachable, so the
DOCX assembly is always validated.
"""
from __future__ import annotations

import sys
from pathlib import Path

try:  # Windows consoles default to cp1252 and choke on emoji/non-ASCII output.
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
except Exception:
    pass

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
sys.path.insert(0, str(ROOT / "scripts"))

import make_sample  # noqa: E402
from src import config, docx_export, video  # noqa: E402
from src import frames as frames_lib  # noqa: E402
from src import guide as guide_lib  # noqa: E402
from src import llm, transcribe  # noqa: E402
from src.llm import GuideDraft, StepDraft  # noqa: E402


def naive_draft(tr) -> GuideDraft:
    steps = [
        StepDraft(heading=f"Step {i}", text=seg.text.strip(), approx_timestamp=seg.start)
        for i, seg in enumerate(tr.segments, start=1)
        if seg.text.strip()
    ]
    return GuideDraft(title="Sample Guide", intro="Generated offline.", steps=steps)


def main() -> None:
    sample = config.WORK_DIR / "sample" / "sample.mp4"
    if not sample.exists():
        sample = make_sample.main()

    sdir = config.session_dir("smoke")

    # The app takes the token from the UI; this headless test reads it from the
    # environment (validating which of HF_TOKEN / HUGGINGFACEHUB_API_TOKEN works).
    token = config.resolve_hf_token()
    print(f"HF token: {'present' if token else 'MISSING (LLM step will use naive fallback)'}")

    print("\n[1/5] Extract audio + transcribe…")
    wav = video.extract_audio(sample, sdir / "audio.wav")
    tr = transcribe.transcribe(wav)
    print(f"  device={tr.device} segments={len(tr.segments)} text={tr.text[:120]!r}")
    assert tr.text.strip(), "Transcript is empty"

    print("[2/5] Build guide draft (LLM)…")
    try:
        draft = llm.build_guide_draft(tr, token=token)
        if not draft.steps:
            raise RuntimeError("LLM returned no steps")
        print(f"  LLM ok: '{draft.title}' ({len(draft.steps)} steps)")
    except Exception as exc:
        print(f"  LLM unavailable ({exc}); using naive fallback draft.")
        draft = naive_draft(tr)
    assert draft.steps, "No steps in draft"

    print("[3/5] Auto-extract frames (step-aligned)…")
    spoken = [(s.start, s.end) for s in tr.segments] if tr.segments else None
    step_ts = [s.approx_timestamp for s in draft.steps if s.approx_timestamp is not None]
    recs = frames_lib.extract_auto_frames(
        sample, sdir, spoken_intervals=spoken, step_timestamps=step_ts or None
    )
    print(f"  frames={len(recs)} (from {len(step_ts)} step timestamps)")
    assert recs, "No frames were extracted"

    print("[4/5] Assemble (align + caption)…")
    spoken_range = (
        (min(s.start for s in tr.segments), max(s.end for s in tr.segments))
        if tr.segments else None
    )
    g = guide_lib.assemble_guide(
        draft, recs, video_path=str(sample), session_dir=sdir, do_caption=True,
        token=token, spoken_range=spoken_range,
    )

    print("[5/5] Export DOCX…")
    out = sdir / "guide.docx"
    docx_export.export_docx(g, out)
    assert out.exists() and out.stat().st_size > 0, "DOCX not written"

    for s in g.steps:
        ts = f"{int((s.timestamp or 0))//60:02d}:{int((s.timestamp or 0))%60:02d}"
        print(f"   - [{ts}] {s.heading!r}: img={'yes' if s.image_path else 'no'} cap={s.caption!r}")
    n_imgs = sum(1 for s in g.steps if s.image_path)
    n_caps = sum(1 for s in g.steps if s.caption)
    print(
        f"\nOK ✅  {out}  ({out.stat().st_size} bytes, "
        f"{len(g.steps)} steps, {n_imgs} images, {n_caps} captions)"
    )


if __name__ == "__main__":
    main()