"""Entry point. Thin by design — all logic lives in the `visualnovel` package.

Two UIs:
  - default: a custom VN frontend (frontend/index.html) served by `gradio.Server`, talking to
    @app.api endpoints via the Gradio JS client.  (Off-Brand / custom-UI bonus.)
  - GRADIO_MVP_UI=1: a plain gr.Blocks UI to de-risk the loop in Phase 0/1.

Run modes
---------
  uv run python app.py                   # use whatever's in .env (VN_MOCK default: 1)
  uv run python app.py --mode mock       # force VN_MOCK=1 (no models needed)
  uv run python app.py --mode prod       # force VN_MOCK=0 (real backends)
  uv run python app.py --mode debug      # VN_MOCK=0 + verbose logging + live monitor
"""

from __future__ import annotations

# Shim: must run BEFORE gradio import ?
try:
    import spaces as _spaces

    if not hasattr(_spaces, "gradio_auto_wrap"):
        _spaces.gradio_auto_wrap = lambda fn: fn
except ImportError:
    pass

# ── Mode selection: must run BEFORE any visualnovel import ──────────────────
# config.py reads os.getenv() at import time via load_dotenv(), so we must
# set the env vars first.
import argparse
import atexit
import logging
import os


def _apply_mode() -> str | None:
    p = argparse.ArgumentParser(add_help=False)
    p.add_argument(
        "--mode",
        choices=["mock", "prod", "debug"],
        default=None,
        help=(
            "mock → VN_MOCK=1 (no models, default)  |  "
            "prod → VN_MOCK=0 (real backends)  |  "
            "debug → VN_MOCK=0 + verbose logs + live resource monitor"
        ),
    )
    args, _ = p.parse_known_args()
    if args.mode == "mock":
        os.environ["VN_MOCK"] = "1"
    elif args.mode == "prod":
        os.environ["VN_MOCK"] = "0"
    elif args.mode == "debug":
        os.environ["VN_MOCK"] = "0"
        os.environ["VN_DEBUG"] = "1"
    return args.mode


_RUN_MODE = _apply_mode()

logging.basicConfig(
    level=logging.WARNING,  # keep third-party libs quiet
    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
    datefmt="%H:%M:%S",
)
if _RUN_MODE == "debug":
    # debug-level logs only for our own package; third-party stays at WARNING
    logging.getLogger("visualnovel").setLevel(logging.DEBUG)

# ── Silence known noisy ML dependency warnings ────────────────────────────
# transformers reads this env var at import time — set BEFORE anything imports it
# (its advisories, e.g. "CLIPImageProcessor requires torchvision", bypass stdlib logging).
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")

import warnings  # noqa: E402

# huggingface_hub: deprecated symlinks arg (internal, not our call)
warnings.filterwarnings("ignore", message=".*local_dir_use_symlinks.*")
# huggingface_hub: unauthenticated rate-limit notice (surfaced as UserWarning too)
warnings.filterwarnings("ignore", message=".*unauthenticated.*")
# transformers: catch any remaining FutureWarnings we don't control (e.g. upstream renames)
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")

# Suppress WARNING-level noise from ML frameworks; errors still surface
for _lib in ("transformers", "diffusers", "huggingface_hub", "phonemizer"):
    logging.getLogger(_lib).setLevel(logging.ERROR)

# ── Project imports (after env vars are set) ─────────────────────────────────
from pathlib import Path

from visualnovel import config
from visualnovel.engine import Engine
from visualnovel.metrics import collector
from visualnovel.schemas import SetupForm

# Activate monitoring only in debug mode (no-op otherwise)
if config.DEBUG:
    collector.activate(config.RUNS_DIR)
    atexit.register(collector.save_report)

if not config.USE_MOCK:
    import subprocess
    import sys

    # Check only the deps required by the configured backends
    _missing: list[str] = []

    if config.LLM_BACKEND == "llamacpp":
        try:
            import llama_cpp  # noqa: F401
        except ImportError:
            _missing.append("llamacpp")

    if config.LLM_BACKEND == "transformers":
        try:
            import transformers  # noqa: F401
        except ImportError:
            _missing.append("transformers")

    if config.IMAGE_BACKEND in ("local", "lightning"):
        try:
            import diffusers  # noqa: F401
        except ImportError:
            _missing.append("image")

    if config.TTS_BACKEND == "kokoro":
        try:
            import kokoro_onnx  # noqa: F401
            import soundfile  # noqa: F401
        except ImportError:
            _missing.append("tts")

    if config.LLM_BACKEND == "modal" or config.IMAGE_BACKEND == "modal":
        try:
            import modal  # noqa: F401
        except ImportError:
            _missing.append("modal")

    if _missing:
        extras = ",".join(_missing)
        print(f"[setup] Missing dependencies — run: uv sync --extra {extras}")
        sys.exit(1)

    # Only fetch the GGUF when the llama.cpp backend is active
    if config.LLM_BACKEND == "llamacpp":
        _gguf_path = config.MODELS_DIR / config.LLM_GGUF_FILE
        if not _gguf_path.exists():
            print(f"[setup] Model not found at {_gguf_path} — running download script…")
            subprocess.run(
                [sys.executable, str(Path(__file__).parent / "scripts" / "download_models.py")],
                check=True,
            )

ENGINE = Engine()  # single-session game

if not config.USE_MOCK and config.LLM_BACKEND == "modal":
    try:
        ENGINE.llm.warmup()  # fire-and-forget: warm the GPU container before the first turn
    except Exception as exc:
        print(f"[setup] Modal warmup skipped: {exc}")

FRONTEND = Path(__file__).parent / "frontend" / "index.html"

try:
    import spaces  # type: ignore

    def gpu(fn=None, **kw):  # supports @gpu and @gpu(duration=...)
        return spaces.GPU(**kw)(fn) if fn is not None else spaces.GPU(**kw)
except Exception:  # pragma: no cover

    def gpu(fn=None, **kw):
        return fn if fn is not None else (lambda f: f)


# =========================================================================== #
#  Custom frontend via gradio.Server
# =========================================================================== #
def build_server():
    from fastapi.responses import HTMLResponse
    from fastapi.staticfiles import StaticFiles
    from gradio import Server

    app = Server()
    # serve generated images (backdrops/sprites) as static files at /images/<name>
    app.mount("/images", StaticFiles(directory=str(config.CACHE_DIR)), name="images")
    # serve background music tracks at /music/<name>.mp3|ogg
    _music_dir = Path(__file__).parent / "frontend" / "music"
    _music_dir.mkdir(exist_ok=True)
    app.mount("/music", StaticFiles(directory=str(_music_dir)), name="music")

    @app.get("/", response_class=HTMLResponse)
    async def home() -> str:
        return FRONTEND.read_text(encoding="utf-8")

    @app.api(name="themes")
    def themes() -> dict:
        return {"themes": config.THEMES, "tones": config.TONES}

    @app.api(name="start")
    @gpu
    def start(
        theme: str = "school",
        tone: str = "romantic",
        seed: int | None = None,
        player_name: str = "",
    ) -> dict:
        form = SetupForm(
            theme=theme, tone=tone, seed=seed, player_name=player_name.strip() or "the wanderer"
        )
        return ENGINE.start(form).model_dump()

    @app.api(name="start_text")
    @gpu
    def start_text(
        theme: str = "school",
        tone: str = "romantic",
        seed: int | None = None,
        player_name: str = "",
    ) -> dict:
        """Phase 1 — LLM init only. Returns text-only ViewState (no images)."""
        form = SetupForm(
            theme=theme, tone=tone, seed=seed, player_name=player_name.strip() or "the wanderer"
        )
        return ENGINE.start_text(form).model_dump()

    @app.api(name="start_images")
    @gpu
    def start_images() -> dict:
        """Phase 2 — paint backdrop + sprite. Call after start_text."""
        return ENGINE.start_images().model_dump()

    @app.api(name="turn")
    @gpu
    def turn(player_input: str, action: str = "talk", target: str = "") -> dict:
        return ENGINE.play_turn(player_input, action=action, target=target).model_dump()

    @app.api(name="turn_text")
    @gpu
    def turn_text(player_input: str, action: str = "talk", target: str = "") -> dict:
        """Phase 1 — STT + LLM + state. Returns text-only ViewState (dialogue first)."""
        return ENGINE.play_turn_text(player_input, action=action, target=target).model_dump()

    @app.api(name="turn_images")
    @gpu
    def turn_images() -> dict:
        """Phase 2 — paint + TTS. Call after turn_text."""
        return ENGINE.play_turn_images().model_dump()

    @app.api(name="session_info")
    def session_info() -> dict:
        """Peek at the persisted session — cheap file read, no GPU."""
        from visualnovel.engine import session_info as _info  # noqa: PLC0415

        return _info()

    @app.api(name="resume")
    @gpu
    def resume() -> dict:
        """Restore the last persisted session (paints + TTS)."""
        view = ENGINE.resume()
        if view is None:
            return {"error": "no session to resume"}
        return view.model_dump()

    @app.api(name="save_data")
    def save_data() -> dict:
        """Return current game state as JSON string for client-side download."""
        return {"json": ENGINE.save_data()}

    @app.api(name="load_file")
    @gpu
    def load_file(data: str) -> dict:
        """Restore game from a JSON string uploaded by the client."""
        return ENGINE.load_data(data).model_dump()

    @app.api(name="transcribe")
    @gpu
    def transcribe(audio: dict) -> dict:
        # `audio` is a Gradio FileData-like dict with a "path" key.
        path = audio["path"] if isinstance(audio, dict) else audio
        return {"text": ENGINE.transcribe(path)}

    # ── Debug dashboard — only registered when VN_DEBUG=1 ────────────────
    if config.DEBUG:
        import asyncio as _asyncio
        import json as _json

        from fastapi.responses import StreamingResponse

        _debug_html = Path(__file__).parent / "frontend" / "debug.html"

        @app.get("/debug", response_class=HTMLResponse)
        async def debug_dashboard() -> str:
            return _debug_html.read_text(encoding="utf-8")

        @app.get("/debug/stream")
        async def debug_stream() -> StreamingResponse:
            async def _gen():
                try:
                    while True:
                        yield f"data: {_json.dumps(collector.snapshot())}\n\n"
                        await _asyncio.sleep(1.0)
                except _asyncio.CancelledError:
                    pass

            return StreamingResponse(
                _gen(),
                media_type="text/event-stream",
                headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
            )

        @app.get("/debug/report")
        async def debug_report() -> dict:
            collector.save_report()
            return {"status": "ok"}

    return app


# =========================================================================== #
#  MVP fallback: plain gr.Blocks
# =========================================================================== #
def build_mvp():
    import gradio as gr

    def on_start(theme, tone):
        v = ENGINE.start(SetupForm(theme=theme, tone=tone))
        bg = v.backdrop_url and (config.CACHE_DIR / Path(v.backdrop_url).name)
        return str(bg) if bg else None, f"**{v.speaker}** ({v.emotion}): {v.dialogue}"

    def on_turn(msg):
        v = ENGINE.play_turn(msg)
        bg = v.backdrop_url and (config.CACHE_DIR / Path(v.backdrop_url).name)
        return str(bg) if bg else None, f"**{v.speaker}** ({v.emotion}): {v.dialogue}", ""

    with gr.Blocks(title="Ephemeral Hearts (MVP)") as demo:
        gr.Markdown("## 💕 Ephemeral Hearts — MVP loop")
        with gr.Row():
            theme = gr.Dropdown(list(config.THEMES), value="school", label="Theme")
            tone = gr.Dropdown(config.TONES, value="romantic", label="Tone")
            start_btn = gr.Button("Enter the story", variant="primary")
        scene = gr.Image(label="Scene", height=420)
        dialogue = gr.Markdown()
        with gr.Row():
            box = gr.Textbox(placeholder="Say something…", scale=4, label="")
            send = gr.Button("Speak", scale=1)
        start_btn.click(on_start, [theme, tone], [scene, dialogue])
        send.click(on_turn, [box], [scene, dialogue, box])
        box.submit(on_turn, [box], [scene, dialogue, box])
    return demo


if __name__ == "__main__":
    if config.MVP_UI:
        build_mvp().launch()
    else:
        build_server().launch(show_error=True)