chalchitra / app.py
ajit3259's picture
feat: in-app example moments + refined prompt on the Space
447230d
Raw
History Blame Contribute Delete
6.9 kB
"""Chalchitra β€” a film for this moment.
Entry point for the Hugging Face Space. The Space runtime expects a Gradio object
named `demo`; defining and launching it also initializes ZeroGPU. We launch it
non-blocking (SSR off so Python serves routes directly), then mount our own React
frontend and the /api/interpret endpoint onto the underlying FastAPI app. The
model answering is whichever one we point at β€” no hosted API in the loop.
The frontend POSTs images + a fragment; the oracle returns the frozen contract:
{"interpretation": str, "films": [{"title", "year", "rationale"}]}
"""
from __future__ import annotations
import os
import traceback
from functools import lru_cache
from pathlib import Path
from dotenv import load_dotenv
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field
import gradio as gr
from backend import InputError, OracleError, interpret
from backend.providers import get_provider
load_dotenv()
# On the Space, run the model in-process on ZeroGPU (Qwen2.5-VL) by default.
# Locally these stay unset, so we keep talking to LM Studio. setdefault lets a
# Space Variable still override either value from the Settings UI.
if os.environ.get("SPACE_ID"):
os.environ.setdefault("CHALCHITRA_PROVIDER", "hf_local")
# 7B: 3B (≀4B, Tiny Titan) was verified and rejected β€” it returned <3 films
# and described the photo rather than reading it. 7B holds the quality bar.
os.environ.setdefault("CHALCHITRA_MODEL", "Qwen/Qwen2.5-VL-7B-Instruct")
# Refined prompt on the Space: on Qwen it picks markedly more atmospheric
# films (and avoids describing the photo); the bounded retry covers its
# occasional JSON wobble. Local dev stays on the baseline default.
os.environ.setdefault("CHALCHITRA_REFINED_PROMPT", "1")
HERE = Path(__file__).parent
DIST = HERE / "frontend" / "dist"
# Exception class names that mean "couldn't reach / hear back from the model
# server" rather than a real bug. Matched by name to avoid importing openai/httpx.
_CONNECTIVITY_ERRORS = {
"APIConnectionError",
"APITimeoutError",
"InternalServerError",
"ConnectError",
"ConnectTimeout",
"ReadTimeout",
"TimeoutException",
}
# ── ZeroGPU needs at least one @spaces.GPU function at module level. `spaces`
# only exists on the Space, so this is a no-op locally. ────────────────────────
try:
import spaces
@spaces.GPU
def _warmup():
pass
except ImportError:
pass
# ── The Space runtime looks for a Gradio object named `demo`; launching it also
# initializes ZeroGPU. Our real interface is the React app mounted below. ──────
with gr.Blocks() as demo:
pass
# Launch non-blocking, then take the underlying FastAPI app. ssr_mode=False so
# Python serves all our routes directly (no Node proxy in front of them).
demo.launch(
server_name="0.0.0.0",
server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))),
prevent_thread_lock=True,
show_error=True,
ssr_mode=False,
)
app = demo.app
# Gradio registers GET "/" for its own (empty) Blocks UI β€” drop it so our React
# index.html can own "/".
app.router.routes = [
r
for r in app.router.routes
if not (
getattr(r, "path", None) == "/"
and "GET" in (getattr(r, "methods", None) or set())
)
]
# No CORS needed: production is same-origin (React served by this app) and the
# Vite dev server proxies /api here, so the browser always sees one origin.
# (Gradio has already started the app by now, so middleware can't be added anyway.)
@lru_cache(maxsize=1)
def _provider():
"""Build the configured provider once and reuse it (matters for hf_local)."""
return get_provider()
class Moment(BaseModel):
images: list[str] = Field(default_factory=list) # data URLs or raw base64 JPEG
fragment: str = ""
exclude: list[str] = Field(default_factory=list) # titles already shown this session
refined: bool | None = None # optional A/B override of the prompt variant
@app.post("/api/interpret")
async def api_interpret(moment: Moment):
if not moment.images:
return JSONResponse({"error": "Bring at least one image."}, status_code=400)
try:
return interpret(
moment.images,
moment.fragment,
provider=_provider(),
exclude=moment.exclude,
refined=moment.refined,
)
except InputError as exc:
# Bad/oversized/unsupported input β€” rejected before reaching the model.
return JSONResponse({"error": str(exc)}, status_code=400)
except OracleError as exc:
# The reel slipped β€” a model/parse problem the user can retry past.
return JSONResponse({"error": str(exc)}, status_code=422)
except Exception as exc: # noqa: BLE001 β€” log the detail, never leak it
traceback.print_exc()
if type(exc).__name__ in _CONNECTIVITY_ERRORS:
return JSONResponse(
{"error": "Chalchitra can't reach the projector right now. Try again in a moment."},
status_code=503,
)
return JSONResponse(
{"error": "Something slipped through the reel. Try again."},
status_code=500,
)
@app.get("/api/health")
async def health():
p = _provider()
return {"ok": True, "provider": p.name, "model": getattr(p, "model", None)}
_PLACEHOLDER = """<!doctype html><html><head><meta charset="utf-8">
<title>Chalchitra</title><style>body{background:#0C0907;color:#EDE6D9;
font-family:Georgia,serif;display:grid;place-items:center;height:100vh;margin:0}
em{color:#C9923E}</style></head><body><div style="text-align:center">
<h1 style="font-weight:300;font-style:italic">Chalchitra</h1>
<p><em>a film for this moment</em></p>
<p style="opacity:.6">Frontend not built yet β€” run <code>npm run build</code> in
<code>frontend/</code>. The API is live at <code>/api/interpret</code>.</p>
</div></body></html>"""
# Serve the built SPA: our JS/CSS under /reel (Gradio owns /assets), index.html
# at "/". Explicit routes rather than a greedy "/" mount so Gradio's own routes
# (/assets, /config, /gradio_api/*) keep working.
if DIST.exists():
app.mount("/reel", StaticFiles(directory=str(DIST / "reel")), name="reel")
@app.get("/", response_class=HTMLResponse)
async def index():
return FileResponse(str(DIST / "index.html"))
else:
@app.get("/", response_class=HTMLResponse)
async def placeholder():
return _PLACEHOLDER
# launch() was non-blocking; keep the process alive when run directly or under
# the Space runtime (which imports this module with SPACE_ID set).
if __name__ == "__main__" or os.environ.get("SPACE_ID"):
demo.block_thread()