Spaces:

build-small-hackathon
/

ready-to-submit

Running on Zero

App Files Files Community

ready-to-submit / app.py

marinarosa

Use a writable runtime HF cache; drop preload_from_hub

b7281ef 7 days ago

Raw

History Blame Contribute Delete

3.47 kB

	"""Submit Eval — is your Build Small hackathon Space ready to submit?

	gr.Server app: plain FastAPI routes serve the HTMX shell and fragments;
	the LLM review runs as a Gradio-queued endpoint (ZeroGPU) streaming HTML
	chunks that bridge.js swaps into the page.
	"""

	import os

	# Writable HF cache BEFORE anything imports huggingface_hub (gradio does).
	# preload_from_hub bakes a cache owned by the build user into the image;
	# any runtime download into it dies with EACCES (xet_get, os error 13) —
	# so models download at startup/first-use into an app-local cache instead.
	_CACHE_ROOT = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".cache")
	for _var, _sub in (("HF_HOME", "hf"), ("HF_XET_CACHE", "xet"), ("TMPDIR", "tmp")):
	_path = os.path.join(_CACHE_ROOT, _sub)
	os.makedirs(_path, exist_ok=True)
	os.environ[_var] = _path

	import time
	from collections.abc import Iterator

	import gradio as gr
	import markdown
	from fastapi import Form
	from fastapi.responses import FileResponse, HTMLResponse
	from fastapi.staticfiles import StaticFiles

	import checks
	import llm
	import presenters

	FRONTEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "frontend")

	app = gr.Server()

	# checks already ran when the review starts — cache them briefly so the
	# review endpoint doesn't redo a dozen Hub API calls
	EVAL_TTL_S = 15 * 60
	_eval_cache: dict[str, tuple[float, checks.Evaluation]] = {}


	def _evaluate_cached(space: str) -> checks.Evaluation:
	name = checks.normalize_space_name(space)
	hit = _eval_cache.get(name)
	if hit and time.monotonic() - hit[0] < EVAL_TTL_S:
	return hit[1]
	ev = checks.evaluate_space(name)
	_eval_cache[name] = (time.monotonic(), ev)
	return ev


	@app.get("/", response_class=HTMLResponse)
	def index() -> FileResponse:
	return FileResponse(os.path.join(FRONTEND_DIR, "index.html"))


	@app.get("/ui/spaces", response_class=HTMLResponse)
	def ui_spaces() -> HTMLResponse:
	return HTMLResponse(presenters.render_space_options(checks.list_org_spaces()))


	@app.post("/ui/check", response_class=HTMLResponse)
	def ui_check(space: str = Form(""), model: str = Form(llm.DEFAULT_MODEL)) -> HTMLResponse:
	if not space.strip():
	return HTMLResponse(presenters.render_error(
	"Pick a space from the dropdown (or paste its name) first."))
	if model not in llm.MODELS:
	model = llm.DEFAULT_MODEL
	ev = _evaluate_cached(space)
	if not ev.exists:
	return HTMLResponse(presenters.render_error(ev.error))
	return HTMLResponse(presenters.render_results(ev, model, llm.MODELS[model]))


	@app.api(name="review")
	def review_api(space: str, model_id: str) -> Iterator[str]:
	"""Stream the review block body as rendered-markdown HTML chunks.

	The Iterator[str] annotation is load-bearing: gr.Server infers the
	endpoint's output count from it — without it every yield is dropped.
	"""
	if model_id not in llm.MODELS:
	model_id = llm.DEFAULT_MODEL
	ev = _evaluate_cached(space)
	text = ""
	for text in llm.generate_review(model_id, ev.to_dict()):
	yield presenters.render_review_chunk(_md(text), done=False)
	yield presenters.render_review_chunk(_md(text), done=True)


	def _md(text: str) -> str:
	return markdown.markdown(text, extensions=["sane_lists", "tables"])


	app.mount("/static", StaticFiles(directory=FRONTEND_DIR), name="static")

	if __name__ == "__main__":
	app.launch(server_name="0.0.0.0")