Spaces:
Running
Running
Case Zero - initial public release (fully local: Qwen2.5-1.5B via llama.cpp + Supertonic, custom pixel-noir SPA via gradio.Server)
414dc55 | """Case Zero entrypoint - one ``gradio.Server`` for a Hugging Face (CPU) Space. | |
| The pixel-art frontend (built Preact bundle in ``web/dist``) and the game's JSON/SSE API | |
| are both served by a single ``gradio.Server`` (a FastAPI subclass). The LLM and TTS run | |
| in-process on the CPU via llama.cpp / Supertonic - no GPU, no inference API, fully local. | |
| Gradio's own frontend and Node SSR proxy are disabled (``_frontend=False``, | |
| ``ssr_mode=False``) so our SPA owns ``/`` and we don't pay the node-proxy CPU cost on the | |
| 2-vCPU Space. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import sys | |
| from pathlib import Path | |
| # Make src importable whether run from the repo root or as a Space. | |
| sys.path.insert(0, str(Path(__file__).resolve().parent / "src")) | |
| os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") | |
| def _ensure_weights() -> None: | |
| """Download the LLM GGUF once if it is not already on disk (Spaces have no baked | |
| weights). A no-op locally where the file already exists. Invoked lazily on first | |
| real generation (M1+), not at boot, so the server starts instantly.""" | |
| from case_zero.config import get_settings | |
| settings = get_settings() | |
| if settings.llm_model_path.exists(): | |
| return | |
| try: | |
| import shutil | |
| from huggingface_hub import hf_hub_download | |
| dest = settings.llm_model_path | |
| dest.parent.mkdir(parents=True, exist_ok=True) | |
| cached = hf_hub_download( | |
| repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF", | |
| filename="qwen2.5-1.5b-instruct-q4_k_m.gguf", | |
| ) | |
| shutil.copy(cached, dest) | |
| print(f"[startup] fetched LLM weights -> {dest}") | |
| except Exception as exc: # pragma: no cover | |
| print(f"[startup] weight fetch failed: {exc}", file=sys.stderr) | |
| def main() -> None: | |
| from case_zero.api import build_server | |
| from case_zero.api.runtime import RUNTIME | |
| # Fetch weights if needed, then prebuild one case in the background so the first | |
| # "New Case" is ready (or nearly) by the time a detective connects. | |
| _ensure_weights() | |
| RUNTIME.start_buffer() | |
| server = build_server() | |
| server.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("CASE0_PORT", "7860")), | |
| share=False, | |
| ssr_mode=False, | |
| _frontend=False, | |
| ) | |
| if __name__ == "__main__": | |
| main() | |