ready-to-submit / app.py
marinarosa's picture
Use a writable runtime HF cache; drop preload_from_hub
b7281ef
Raw
History Blame Contribute Delete
3.47 kB
"""Submit Eval β€” is your Build Small hackathon Space ready to submit?
gr.Server app: plain FastAPI routes serve the HTMX shell and fragments;
the LLM review runs as a Gradio-queued endpoint (ZeroGPU) streaming HTML
chunks that bridge.js swaps into the page.
"""
import os
# Writable HF cache BEFORE anything imports huggingface_hub (gradio does).
# preload_from_hub bakes a cache owned by the build user into the image;
# any runtime download into it dies with EACCES (xet_get, os error 13) β€”
# so models download at startup/first-use into an app-local cache instead.
_CACHE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".cache")
for _var, _sub in (("HF_HOME", "hf"), ("HF_XET_CACHE", "xet"), ("TMPDIR", "tmp")):
_path = os.path.join(_CACHE_ROOT, _sub)
os.makedirs(_path, exist_ok=True)
os.environ[_var] = _path
import time
from collections.abc import Iterator
import gradio as gr
import markdown
from fastapi import Form
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.staticfiles import StaticFiles
import checks
import llm
import presenters
FRONTEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "frontend")
app = gr.Server()
# checks already ran when the review starts β€” cache them briefly so the
# review endpoint doesn't redo a dozen Hub API calls
EVAL_TTL_S = 15 * 60
_eval_cache: dict[str, tuple[float, checks.Evaluation]] = {}
def _evaluate_cached(space: str) -> checks.Evaluation:
name = checks.normalize_space_name(space)
hit = _eval_cache.get(name)
if hit and time.monotonic() - hit[0] < EVAL_TTL_S:
return hit[1]
ev = checks.evaluate_space(name)
_eval_cache[name] = (time.monotonic(), ev)
return ev
@app.get("/", response_class=HTMLResponse)
def index() -> FileResponse:
return FileResponse(os.path.join(FRONTEND_DIR, "index.html"))
@app.get("/ui/spaces", response_class=HTMLResponse)
def ui_spaces() -> HTMLResponse:
return HTMLResponse(presenters.render_space_options(checks.list_org_spaces()))
@app.post("/ui/check", response_class=HTMLResponse)
def ui_check(space: str = Form(""), model: str = Form(llm.DEFAULT_MODEL)) -> HTMLResponse:
if not space.strip():
return HTMLResponse(presenters.render_error(
"Pick a space from the dropdown (or paste its name) first."))
if model not in llm.MODELS:
model = llm.DEFAULT_MODEL
ev = _evaluate_cached(space)
if not ev.exists:
return HTMLResponse(presenters.render_error(ev.error))
return HTMLResponse(presenters.render_results(ev, model, llm.MODELS[model]))
@app.api(name="review")
def review_api(space: str, model_id: str) -> Iterator[str]:
"""Stream the review block body as rendered-markdown HTML chunks.
The Iterator[str] annotation is load-bearing: gr.Server infers the
endpoint's output count from it β€” without it every yield is dropped.
"""
if model_id not in llm.MODELS:
model_id = llm.DEFAULT_MODEL
ev = _evaluate_cached(space)
text = ""
for text in llm.generate_review(model_id, ev.to_dict()):
yield presenters.render_review_chunk(_md(text), done=False)
yield presenters.render_review_chunk(_md(text), done=True)
def _md(text: str) -> str:
return markdown.markdown(text, extensions=["sane_lists", "tables"])
app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")
if __name__ == "__main__":
app.launch(server_name="0.0.0.0")