WillHbx's picture
feat(app, frontend): split-phase endpoints, resume, player name, main-menu button
797862a
Raw
History Blame Contribute Delete
13 kB
"""Entry point. Thin by design β€” all logic lives in the `visualnovel` package.
Two UIs:
- default: a custom VN frontend (frontend/index.html) served by `gradio.Server`, talking to
@app.api endpoints via the Gradio JS client. (Off-Brand / custom-UI bonus.)
- GRADIO_MVP_UI=1: a plain gr.Blocks UI to de-risk the loop in Phase 0/1.
Run modes
---------
uv run python app.py # use whatever's in .env (VN_MOCK default: 1)
uv run python app.py --mode mock # force VN_MOCK=1 (no models needed)
uv run python app.py --mode prod # force VN_MOCK=0 (real backends)
uv run python app.py --mode debug # VN_MOCK=0 + verbose logging + live monitor
"""
from __future__ import annotations
# Shim: must run BEFORE gradio import ?
try:
import spaces as _spaces
if not hasattr(_spaces, "gradio_auto_wrap"):
_spaces.gradio_auto_wrap = lambda fn: fn
except ImportError:
pass
# ── Mode selection: must run BEFORE any visualnovel import ──────────────────
# config.py reads os.getenv() at import time via load_dotenv(), so we must
# set the env vars first.
import argparse
import atexit
import logging
import os
def _apply_mode() -> str | None:
p = argparse.ArgumentParser(add_help=False)
p.add_argument(
"--mode",
choices=["mock", "prod", "debug"],
default=None,
help=(
"mock β†’ VN_MOCK=1 (no models, default) | "
"prod β†’ VN_MOCK=0 (real backends) | "
"debug β†’ VN_MOCK=0 + verbose logs + live resource monitor"
),
)
args, _ = p.parse_known_args()
if args.mode == "mock":
os.environ["VN_MOCK"] = "1"
elif args.mode == "prod":
os.environ["VN_MOCK"] = "0"
elif args.mode == "debug":
os.environ["VN_MOCK"] = "0"
os.environ["VN_DEBUG"] = "1"
return args.mode
_RUN_MODE = _apply_mode()
logging.basicConfig(
level=logging.WARNING, # keep third-party libs quiet
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
datefmt="%H:%M:%S",
)
if _RUN_MODE == "debug":
# debug-level logs only for our own package; third-party stays at WARNING
logging.getLogger("visualnovel").setLevel(logging.DEBUG)
# ── Silence known noisy ML dependency warnings ────────────────────────────
# transformers reads this env var at import time β€” set BEFORE anything imports it
# (its advisories, e.g. "CLIPImageProcessor requires torchvision", bypass stdlib logging).
os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
import warnings # noqa: E402
# huggingface_hub: deprecated symlinks arg (internal, not our call)
warnings.filterwarnings("ignore", message=".*local_dir_use_symlinks.*")
# huggingface_hub: unauthenticated rate-limit notice (surfaced as UserWarning too)
warnings.filterwarnings("ignore", message=".*unauthenticated.*")
# transformers: catch any remaining FutureWarnings we don't control (e.g. upstream renames)
warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
# Suppress WARNING-level noise from ML frameworks; errors still surface
for _lib in ("transformers", "diffusers", "huggingface_hub", "phonemizer"):
logging.getLogger(_lib).setLevel(logging.ERROR)
# ── Project imports (after env vars are set) ─────────────────────────────────
from pathlib import Path
from visualnovel import config
from visualnovel.engine import Engine
from visualnovel.metrics import collector
from visualnovel.schemas import SetupForm
# Activate monitoring only in debug mode (no-op otherwise)
if config.DEBUG:
collector.activate(config.RUNS_DIR)
atexit.register(collector.save_report)
if not config.USE_MOCK:
import subprocess
import sys
# Check only the deps required by the configured backends
_missing: list[str] = []
if config.LLM_BACKEND == "llamacpp":
try:
import llama_cpp # noqa: F401
except ImportError:
_missing.append("llamacpp")
if config.LLM_BACKEND == "transformers":
try:
import transformers # noqa: F401
except ImportError:
_missing.append("transformers")
if config.IMAGE_BACKEND in ("local", "lightning"):
try:
import diffusers # noqa: F401
except ImportError:
_missing.append("image")
if config.TTS_BACKEND == "kokoro":
try:
import kokoro_onnx # noqa: F401
import soundfile # noqa: F401
except ImportError:
_missing.append("tts")
if config.LLM_BACKEND == "modal" or config.IMAGE_BACKEND == "modal":
try:
import modal # noqa: F401
except ImportError:
_missing.append("modal")
if _missing:
extras = ",".join(_missing)
print(f"[setup] Missing dependencies β€” run: uv sync --extra {extras}")
sys.exit(1)
# Only fetch the GGUF when the llama.cpp backend is active
if config.LLM_BACKEND == "llamacpp":
_gguf_path = config.MODELS_DIR / config.LLM_GGUF_FILE
if not _gguf_path.exists():
print(f"[setup] Model not found at {_gguf_path} β€” running download script…")
subprocess.run(
[sys.executable, str(Path(__file__).parent / "scripts" / "download_models.py")],
check=True,
)
ENGINE = Engine() # single-session game
if not config.USE_MOCK and config.LLM_BACKEND == "modal":
try:
ENGINE.llm.warmup() # fire-and-forget: warm the GPU container before the first turn
except Exception as exc:
print(f"[setup] Modal warmup skipped: {exc}")
FRONTEND = Path(__file__).parent / "frontend" / "index.html"
try:
import spaces # type: ignore
def gpu(fn=None, **kw): # supports @gpu and @gpu(duration=...)
return spaces.GPU(**kw)(fn) if fn is not None else spaces.GPU(**kw)
except Exception: # pragma: no cover
def gpu(fn=None, **kw):
return fn if fn is not None else (lambda f: f)
# =========================================================================== #
# Custom frontend via gradio.Server
# =========================================================================== #
def build_server():
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from gradio import Server
app = Server()
# serve generated images (backdrops/sprites) as static files at /images/<name>
app.mount("/images", StaticFiles(directory=str(config.CACHE_DIR)), name="images")
# serve background music tracks at /music/<name>.mp3|ogg
_music_dir = Path(__file__).parent / "frontend" / "music"
_music_dir.mkdir(exist_ok=True)
app.mount("/music", StaticFiles(directory=str(_music_dir)), name="music")
@app.get("/", response_class=HTMLResponse)
async def home() -> str:
return FRONTEND.read_text(encoding="utf-8")
@app.api(name="themes")
def themes() -> dict:
return {"themes": config.THEMES, "tones": config.TONES}
@app.api(name="start")
@gpu
def start(
theme: str = "school",
tone: str = "romantic",
seed: int | None = None,
player_name: str = "",
) -> dict:
form = SetupForm(
theme=theme, tone=tone, seed=seed, player_name=player_name.strip() or "the wanderer"
)
return ENGINE.start(form).model_dump()
@app.api(name="start_text")
@gpu
def start_text(
theme: str = "school",
tone: str = "romantic",
seed: int | None = None,
player_name: str = "",
) -> dict:
"""Phase 1 β€” LLM init only. Returns text-only ViewState (no images)."""
form = SetupForm(
theme=theme, tone=tone, seed=seed, player_name=player_name.strip() or "the wanderer"
)
return ENGINE.start_text(form).model_dump()
@app.api(name="start_images")
@gpu
def start_images() -> dict:
"""Phase 2 β€” paint backdrop + sprite. Call after start_text."""
return ENGINE.start_images().model_dump()
@app.api(name="turn")
@gpu
def turn(player_input: str, action: str = "talk", target: str = "") -> dict:
return ENGINE.play_turn(player_input, action=action, target=target).model_dump()
@app.api(name="turn_text")
@gpu
def turn_text(player_input: str, action: str = "talk", target: str = "") -> dict:
"""Phase 1 β€” STT + LLM + state. Returns text-only ViewState (dialogue first)."""
return ENGINE.play_turn_text(player_input, action=action, target=target).model_dump()
@app.api(name="turn_images")
@gpu
def turn_images() -> dict:
"""Phase 2 β€” paint + TTS. Call after turn_text."""
return ENGINE.play_turn_images().model_dump()
@app.api(name="session_info")
def session_info() -> dict:
"""Peek at the persisted session β€” cheap file read, no GPU."""
from visualnovel.engine import session_info as _info # noqa: PLC0415
return _info()
@app.api(name="resume")
@gpu
def resume() -> dict:
"""Restore the last persisted session (paints + TTS)."""
view = ENGINE.resume()
if view is None:
return {"error": "no session to resume"}
return view.model_dump()
@app.api(name="save_data")
def save_data() -> dict:
"""Return current game state as JSON string for client-side download."""
return {"json": ENGINE.save_data()}
@app.api(name="load_file")
@gpu
def load_file(data: str) -> dict:
"""Restore game from a JSON string uploaded by the client."""
return ENGINE.load_data(data).model_dump()
@app.api(name="transcribe")
@gpu
def transcribe(audio: dict) -> dict:
# `audio` is a Gradio FileData-like dict with a "path" key.
path = audio["path"] if isinstance(audio, dict) else audio
return {"text": ENGINE.transcribe(path)}
# ── Debug dashboard β€” only registered when VN_DEBUG=1 ────────────────
if config.DEBUG:
import asyncio as _asyncio
import json as _json
from fastapi.responses import StreamingResponse
_debug_html = Path(__file__).parent / "frontend" / "debug.html"
@app.get("/debug", response_class=HTMLResponse)
async def debug_dashboard() -> str:
return _debug_html.read_text(encoding="utf-8")
@app.get("/debug/stream")
async def debug_stream() -> StreamingResponse:
async def _gen():
try:
while True:
yield f"data: {_json.dumps(collector.snapshot())}\n\n"
await _asyncio.sleep(1.0)
except _asyncio.CancelledError:
pass
return StreamingResponse(
_gen(),
media_type="text/event-stream",
headers={"Cache-Control": "no-cache", "X-Accel-Buffering": "no"},
)
@app.get("/debug/report")
async def debug_report() -> dict:
collector.save_report()
return {"status": "ok"}
return app
# =========================================================================== #
# MVP fallback: plain gr.Blocks
# =========================================================================== #
def build_mvp():
import gradio as gr
def on_start(theme, tone):
v = ENGINE.start(SetupForm(theme=theme, tone=tone))
bg = v.backdrop_url and (config.CACHE_DIR / Path(v.backdrop_url).name)
return str(bg) if bg else None, f"**{v.speaker}** ({v.emotion}): {v.dialogue}"
def on_turn(msg):
v = ENGINE.play_turn(msg)
bg = v.backdrop_url and (config.CACHE_DIR / Path(v.backdrop_url).name)
return str(bg) if bg else None, f"**{v.speaker}** ({v.emotion}): {v.dialogue}", ""
with gr.Blocks(title="Ephemeral Hearts (MVP)") as demo:
gr.Markdown("## πŸ’• Ephemeral Hearts β€” MVP loop")
with gr.Row():
theme = gr.Dropdown(list(config.THEMES), value="school", label="Theme")
tone = gr.Dropdown(config.TONES, value="romantic", label="Tone")
start_btn = gr.Button("Enter the story", variant="primary")
scene = gr.Image(label="Scene", height=420)
dialogue = gr.Markdown()
with gr.Row():
box = gr.Textbox(placeholder="Say something…", scale=4, label="")
send = gr.Button("Speak", scale=1)
start_btn.click(on_start, [theme, tone], [scene, dialogue])
send.click(on_turn, [box], [scene, dialogue, box])
box.submit(on_turn, [box], [scene, dialogue, box])
return demo
if __name__ == "__main__":
if config.MVP_UI:
build_mvp().launch()
else:
build_server().launch(show_error=True)