"""Submit Eval — is your Build Small hackathon Space ready to submit? gr.Server app: plain FastAPI routes serve the HTMX shell and fragments; the LLM review runs as a Gradio-queued endpoint (ZeroGPU) streaming HTML chunks that bridge.js swaps into the page. """ import os # Writable HF cache BEFORE anything imports huggingface_hub (gradio does). # preload_from_hub bakes a cache owned by the build user into the image; # any runtime download into it dies with EACCES (xet_get, os error 13) — # so models download at startup/first-use into an app-local cache instead. _CACHE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".cache") for _var, _sub in (("HF_HOME", "hf"), ("HF_XET_CACHE", "xet"), ("TMPDIR", "tmp")): _path = os.path.join(_CACHE_ROOT, _sub) os.makedirs(_path, exist_ok=True) os.environ[_var] = _path import time from collections.abc import Iterator import gradio as gr import markdown from fastapi import Form from fastapi.responses import FileResponse, HTMLResponse from fastapi.staticfiles import StaticFiles import checks import llm import presenters FRONTEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "frontend") app = gr.Server() # checks already ran when the review starts — cache them briefly so the # review endpoint doesn't redo a dozen Hub API calls EVAL_TTL_S = 15 * 60 _eval_cache: dict[str, tuple[float, checks.Evaluation]] = {} def _evaluate_cached(space: str) -> checks.Evaluation: name = checks.normalize_space_name(space) hit = _eval_cache.get(name) if hit and time.monotonic() - hit[0] < EVAL_TTL_S: return hit[1] ev = checks.evaluate_space(name) _eval_cache[name] = (time.monotonic(), ev) return ev @app.get("/", response_class=HTMLResponse) def index() -> FileResponse: return FileResponse(os.path.join(FRONTEND_DIR, "index.html")) @app.get("/ui/spaces", response_class=HTMLResponse) def ui_spaces() -> HTMLResponse: return HTMLResponse(presenters.render_space_options(checks.list_org_spaces())) @app.post("/ui/check", response_class=HTMLResponse) def ui_check(space: str = Form(""), model: str = Form(llm.DEFAULT_MODEL)) -> HTMLResponse: if not space.strip(): return HTMLResponse(presenters.render_error( "Pick a space from the dropdown (or paste its name) first.")) if model not in llm.MODELS: model = llm.DEFAULT_MODEL ev = _evaluate_cached(space) if not ev.exists: return HTMLResponse(presenters.render_error(ev.error)) return HTMLResponse(presenters.render_results(ev, model, llm.MODELS[model])) @app.api(name="review") def review_api(space: str, model_id: str) -> Iterator[str]: """Stream the review block body as rendered-markdown HTML chunks. The Iterator[str] annotation is load-bearing: gr.Server infers the endpoint's output count from it — without it every yield is dropped. """ if model_id not in llm.MODELS: model_id = llm.DEFAULT_MODEL ev = _evaluate_cached(space) text = "" for text in llm.generate_review(model_id, ev.to_dict()): yield presenters.render_review_chunk(_md(text), done=False) yield presenters.render_review_chunk(_md(text), done=True) def _md(text: str) -> str: return markdown.markdown(text, extensions=["sane_lists", "tables"]) app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static") if __name__ == "__main__": app.launch(server_name="0.0.0.0")