"""Chalchitra — a film for this moment. Entry point for the Hugging Face Space. The Space runtime expects a Gradio object named `demo`; defining and launching it also initializes ZeroGPU. We launch it non-blocking (SSR off so Python serves routes directly), then mount our own React frontend and the /api/interpret endpoint onto the underlying FastAPI app. The model answering is whichever one we point at — no hosted API in the loop. The frontend POSTs images + a fragment; the oracle returns the frozen contract: {"interpretation": str, "films": [{"title", "year", "rationale"}]} """ from __future__ import annotations import os import traceback from functools import lru_cache from pathlib import Path from dotenv import load_dotenv from fastapi.responses import FileResponse, HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, Field import gradio as gr from backend import InputError, OracleError, interpret from backend.providers import get_provider load_dotenv() # On the Space, run the model in-process on ZeroGPU (Qwen2.5-VL) by default. # Locally these stay unset, so we keep talking to LM Studio. setdefault lets a # Space Variable still override either value from the Settings UI. if os.environ.get("SPACE_ID"): os.environ.setdefault("CHALCHITRA_PROVIDER", "hf_local") # 7B: 3B (≤4B, Tiny Titan) was verified and rejected — it returned <3 films # and described the photo rather than reading it. 7B holds the quality bar. os.environ.setdefault("CHALCHITRA_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct") # Refined prompt on the Space: on Qwen it picks markedly more atmospheric # films (and avoids describing the photo); the bounded retry covers its # occasional JSON wobble. Local dev stays on the baseline default. os.environ.setdefault("CHALCHITRA_REFINED_PROMPT", "1") HERE = Path(__file__).parent DIST = HERE / "frontend" / "dist" # Exception class names that mean "couldn't reach / hear back from the model # server" rather than a real bug. Matched by name to avoid importing openai/httpx. _CONNECTIVITY_ERRORS = { "APIConnectionError", "APITimeoutError", "InternalServerError", "ConnectError", "ConnectTimeout", "ReadTimeout", "TimeoutException", } # ── ZeroGPU needs at least one @spaces.GPU function at module level. `spaces` # only exists on the Space, so this is a no-op locally. ──────────────────────── try: import spaces @spaces.GPU def _warmup(): pass except ImportError: pass # ── The Space runtime looks for a Gradio object named `demo`; launching it also # initializes ZeroGPU. Our real interface is the React app mounted below. ────── with gr.Blocks() as demo: pass # Launch non-blocking, then take the underlying FastAPI app. ssr_mode=False so # Python serves all our routes directly (no Node proxy in front of them). demo.launch( server_name="0.0.0.0", server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))), prevent_thread_lock=True, show_error=True, ssr_mode=False, ) app = demo.app # Gradio registers GET "/" for its own (empty) Blocks UI — drop it so our React # index.html can own "/". app.router.routes = [ r for r in app.router.routes if not ( getattr(r, "path", None) == "/" and "GET" in (getattr(r, "methods", None) or set()) ) ] # No CORS needed: production is same-origin (React served by this app) and the # Vite dev server proxies /api here, so the browser always sees one origin. # (Gradio has already started the app by now, so middleware can't be added anyway.) @lru_cache(maxsize=1) def _provider(): """Build the configured provider once and reuse it (matters for hf_local).""" return get_provider() class Moment(BaseModel): images: list[str] = Field(default_factory=list) # data URLs or raw base64 JPEG fragment: str = "" exclude: list[str] = Field(default_factory=list) # titles already shown this session refined: bool | None = None # optional A/B override of the prompt variant @app.post("/api/interpret") async def api_interpret(moment: Moment): if not moment.images: return JSONResponse({"error": "Bring at least one image."}, status_code=400) try: return interpret( moment.images, moment.fragment, provider=_provider(), exclude=moment.exclude, refined=moment.refined, ) except InputError as exc: # Bad/oversized/unsupported input — rejected before reaching the model. return JSONResponse({"error": str(exc)}, status_code=400) except OracleError as exc: # The reel slipped — a model/parse problem the user can retry past. return JSONResponse({"error": str(exc)}, status_code=422) except Exception as exc: # noqa: BLE001 — log the detail, never leak it traceback.print_exc() if type(exc).__name__ in _CONNECTIVITY_ERRORS: return JSONResponse( {"error": "Chalchitra can't reach the projector right now. Try again in a moment."}, status_code=503, ) return JSONResponse( {"error": "Something slipped through the reel. Try again."}, status_code=500, ) @app.get("/api/health") async def health(): p = _provider() return {"ok": True, "provider": p.name, "model": getattr(p, "model", None)} _PLACEHOLDER = """ Chalchitra

Chalchitra

a film for this moment

Frontend not built yet — run npm run build in frontend/. The API is live at /api/interpret.

""" # Serve the built SPA: our JS/CSS under /reel (Gradio owns /assets), index.html # at "/". Explicit routes rather than a greedy "/" mount so Gradio's own routes # (/assets, /config, /gradio_api/*) keep working. if DIST.exists(): app.mount("/reel", StaticFiles(directory=str(DIST / "reel")), name="reel") @app.get("/", response_class=HTMLResponse) async def index(): return FileResponse(str(DIST / "index.html")) else: @app.get("/", response_class=HTMLResponse) async def placeholder(): return _PLACEHOLDER # launch() was non-blocking; keep the process alive when run directly or under # the Space runtime (which imports this module with SPACE_ID set). if __name__ == "__main__" or os.environ.get("SPACE_ID"): demo.block_thread()