"""Case Zero entrypoint - one ``gradio.Server`` for a Hugging Face (CPU) Space. The pixel-art frontend (built Preact bundle in ``web/dist``) and the game's JSON/SSE API are both served by a single ``gradio.Server`` (a FastAPI subclass). The LLM and TTS run in-process on the CPU via llama.cpp / Supertonic - no GPU, no inference API, fully local. Gradio's own frontend and Node SSR proxy are disabled (``_frontend=False``, ``ssr_mode=False``) so our SPA owns ``/`` and we don't pay the node-proxy CPU cost on the 2-vCPU Space. """ from __future__ import annotations import os import sys from pathlib import Path # Make src importable whether run from the repo root or as a Space. sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") def _ensure_weights() -> None: """Download the LLM GGUF once if it is not already on disk (Spaces have no baked weights). A no-op locally where the file already exists. Invoked lazily on first real generation (M1+), not at boot, so the server starts instantly.""" from case_zero.config import get_settings settings = get_settings() if settings.llm_model_path.exists(): return try: import shutil from huggingface_hub import hf_hub_download dest = settings.llm_model_path dest.parent.mkdir(parents=True, exist_ok=True) cached = hf_hub_download( repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF", filename="qwen2.5-1.5b-instruct-q4_k_m.gguf", ) shutil.copy(cached, dest) print(f"[startup] fetched LLM weights -> {dest}") except Exception as exc: # pragma: no cover print(f"[startup] weight fetch failed: {exc}", file=sys.stderr) def main() -> None: from case_zero.api import build_server from case_zero.api.runtime import RUNTIME # Fetch weights if needed, then prebuild one case in the background so the first # "New Case" is ready (or nearly) by the time a detective connects. _ensure_weights() RUNTIME.start_buffer() server = build_server() server.launch( server_name="0.0.0.0", server_port=int(os.environ.get("CASE0_PORT", "7860")), share=False, ssr_mode=False, _frontend=False, ) if __name__ == "__main__": main()