Spaces:

build-small-hackathon
/

her

Running on Zero

App Files Files Community

her / app.py

geekwrestler

Deploy Her (Gradio Server / ZeroGPU + bucket + per-client isolation + enrichment)

c6bf731 verified 3 days ago

raw

history blame contribute delete

20.4 kB

	#!/usr/bin/env python3
	"""Her · हेर — Hugging Face ZeroGPU Space entrypoint (Gradio Server mode).

	ZeroGPU is Gradio-SDK-only and its GPU quota requires the HF iframe auth headers to
	be forwarded on GPU-invoking calls — a plain `fetch` to a custom route that triggers
	`@spaces.GPU` bypasses that and fails. So this app uses Gradio Server mode
	(`gradio.Server`, a FastAPI server with Gradio's API engine):

	* DETERMINISTIC engine endpoints (no GPU) are plain FastAPI routes the React app
	calls with `fetch`:
	GET /api/health GET /api/sessions
	POST /api/upload GET /api/analyze?path=
	GET /api/project?cwd= POST /api/clear GET/POST /api/consent
	* GPU narration endpoints are Gradio API endpoints (`@app.api`) the browser calls
	via `@gradio/client` (which forwards the auth headers ZeroGPU needs):
	overview · advice · chat · project_chat · project_narrative

	STORAGE & PRIVACY (the hosted Space):
	* Uploaded sessions are stored on an HF storage bucket mounted read-write at
	`HER_DATA_DIR` (`/data`), namespaced per client: `/data/<ns>/<project>/<file>.jsonl`
	where `ns = sha256(client-token)`. The client token is generated in the browser
	(localStorage) and sent as the `X-Her-Client` header (REST) / `client` arg (Gradio),
	so every user only ever SEES and ANALYZES their own sessions — public-safe.
	* Trace content is auto-deleted: a background sweeper removes anything older than
	`HER_RETENTION_HOURS` (24h) — the hard guarantee — and `POST /api/clear` wipes the
	caller's namespace immediately (the UI calls it on a "Clear" click and on tab-close).

	The deterministic ENGINE is reused unchanged from the local product; only the transport
	and the model backend differ. server/app.py stays the single source of truth.
	"""
	from __future__ import annotations

	import hashlib
	import os
	import re
	import shutil
	import sys
	import threading
	import time
	import uuid
	from pathlib import Path

	# Select the HF/ZeroGPU narrator backend BEFORE importing server helpers, so every
	# get_narrator() call in server/app.py resolves to the transformers model.
	os.environ.setdefault("HER_BACKEND", "hf")
	# No usage telemetry to gradio.app from a privacy-focused app (set before importing gradio).
	os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")

	import spaces # noqa: F401 (ZeroGPU runtime hook; effect-free off-Space)

	# Force the model to load at MODULE level (ZeroGPU requirement: cuda placement under
	# CUDA-emulation at import; real GPU only inside @spaces.GPU). Safe if it fails — the
	# narrator reports not-ready and callers fall back to the deterministic prose.
	import narrator.hf_narrator # noqa: F401,E402

	import gradio as gr # noqa: E402
	from fastapi import File, Form, Header, UploadFile # noqa: E402
	from fastapi.responses import FileResponse, JSONResponse # noqa: E402
	from fastapi.staticfiles import StaticFiles # noqa: E402

	import server.app as srv # noqa: E402 (the engine request logic — reused as-is)

	REPO = Path(__file__).resolve().parent
	DIST = REPO / "ui" / "dist"

	# Storage root: the HF bucket mount on the Space (HER_DATA_DIR=/data), else a local dir.
	# server/app.py is told HER_EXTRA_ROOT=/data so _safe_session_path permits paths here.
	DATA_DIR = Path(os.environ.get("HER_DATA_DIR", str(REPO / ".uploads"))).resolve()
	DATA_DIR.mkdir(parents=True, exist_ok=True)
	RETENTION_HOURS = float(os.environ.get("HER_RETENTION_HOURS", "24"))
	SWEEP_INTERVAL = int(os.environ.get("HER_SWEEP_INTERVAL", "1800")) # 30 min

	# Public-safe budgets — one client must not be able to exhaust memory or the bucket.
	MAX_UPLOAD_BYTES = 70 * 1024 * 1024 # 70 MB per uploaded session file
	MAX_PROJECTS_PER_NS = 50 # projects (subdirs) per client namespace
	MAX_SESSIONS_PER_PROJECT = 50 # .jsonl sessions per project subdir


	def _log_err(where: str, e: Exception) -> None:
	"""Server-side error detail (stderr) so client responses can stay generic — we
	never hand internal paths / tracebacks back to the browser (info-disclosure)."""
	print(f"[her] {where}: {type(e).__name__}: {e}", file=sys.stderr, flush=True)

	# The shared, persistent binary registry the enricher writes lives OUTSIDE every user
	# namespace (`/data/_registry/...` via HER_LEARNED_PATH). Users can never reach it:
	# uploads only ever land under `/data/<sha256(token)>/`, and the sweeper skips it.
	REGISTRY_DIRNAME = "_registry"
	# The recorded product demo (mp4) is a shared, non-user asset on the bucket at
	# `/data/_assets/her-demo.mp4` (uploaded out-of-band, served read-only by /api/demo-video).
	# Like the registry it is never a user upload and must never be swept.
	ASSETS_DIRNAME = "_assets"
	DEMO_VIDEO_NAME = "her-demo.mp4"
	# Bucket dirs that hold shared state, not per-user trace content — the sweeper skips them.
	PROTECTED_DIRNAMES = (REGISTRY_DIRNAME, ASSETS_DIRNAME)
	_LEARNED = os.environ.get("HER_LEARNED_PATH")
	if _LEARNED:
	try:
	Path(_LEARNED).parent.mkdir(parents=True, exist_ok=True)
	except OSError:
	pass

	app = gr.Server()


	# --------------------------------------------------------------------------- #
	# per-client namespace — isolates each browser's uploads (public-safe). The token
	# is opaque to us; we only hash it to a directory name.
	# --------------------------------------------------------------------------- #
	def _ns(client: str) -> str:
	return hashlib.sha256((client or "anon").encode("utf-8")).hexdigest()[:16]


	def _ns_dir(client: str) -> Path:
	return DATA_DIR / _ns(client)


	def _safe_subdir(name: str) -> str:
	"""Sanitize a caller-supplied project subdir (no traversal); default 'uploads'.
	'.' is dropped entirely so '..'/dot-segments can never escape the namespace dir."""
	s = re.sub(r"[^A-Za-z0-9_-]", "_", (name or "").strip())
	return s[:80] or "uploads"


	def _client_owns(p: Path, client: str) -> bool:
	"""A bucket-stored path must belong to the requesting client's namespace. Paths
	outside DATA_DIR (the bundled fixture / local sessions) are unaffected."""
	try:
	if not p.is_relative_to(DATA_DIR):
	return True
	return p.is_relative_to(_ns_dir(client))
	except Exception:
	return False # fail CLOSED — a security predicate must never default to "allow"


	# --------------------------------------------------------------------------- #
	# DETERMINISTIC engine endpoints — plain FastAPI routes, no GPU (React `fetch`).
	# --------------------------------------------------------------------------- #
	@app.get("/api/health")
	def api_health():
	try:
	ready = srv.get_narrator().wait_until_ready(max_wait=0.1, interval=0.1)
	except Exception:
	ready = False
	# `llama` is the UI's flag for "model reachable"; `gpu` tells the UI to route
	# narration through @gradio/client (auth forwards for ZeroGPU quota).
	# `space` (HF sets SPACE_ID="owner/name" in the container) lets the UI build a
	# download command that points at THIS Space, not the author's. Empty locally.
	return {"ok": True, "llama": bool(ready), "gpu": True, "space": os.environ.get("SPACE_ID", "")}


	@app.get("/api/sessions")
	def api_sessions(x_her_client: str = Header(default="")):
	try:
	# Scoped to THIS client's namespace — you only ever see your own uploads.
	return srv._sessions_payload(projects_dir=str(_ns_dir(x_her_client)))
	except Exception as e: # never 500 the browser
	_log_err("sessions", e)
	return {"error": "could not list sessions", "projects": [], "total": 0}


	@app.post("/api/upload")
	async def api_upload(
	file: UploadFile = File(...),
	project: str = Form(default="uploads"),
	x_her_client: str = Header(default=""),
	):
	"""Store an uploaded .jsonl under the caller's namespace:
	/data/<ns>/<project>/<uuid>.jsonl. `project` (the bulk script passes the encoded
	project dir) becomes the subdir so discovery's <ns>//.jsonl glob groups them.
	Guarded: .jsonl only, a hard size cap, and per-namespace project/session budgets."""
	name = (file.filename or "").lower()
	if not name.endswith(".jsonl"):
	return JSONResponse({"error": "only .jsonl files are accepted"}, status_code=400)
	# Bounded read: pull at most the cap (+1 sentinel) into memory — a multi-GB upload
	# can't OOM the box. read(N) returns ≤N bytes; cap+1 back means it's over budget.
	data = await file.read(MAX_UPLOAD_BYTES + 1)
	if len(data) > MAX_UPLOAD_BYTES:
	return JSONResponse({"error": "file too large (max 70 MB per session)"}, status_code=413)
	if not data.strip():
	return JSONResponse({"error": "empty file"}, status_code=400)
	nsd = _ns_dir(x_her_client)
	dest_dir = nsd / _safe_subdir(project)
	# belt + braces: the destination must stay inside the caller's namespace dir.
	try:
	if not dest_dir.resolve().is_relative_to(nsd.resolve()):
	return JSONResponse({"error": "bad project"}, status_code=400)
	except Exception:
	return JSONResponse({"error": "bad project"}, status_code=400)
	# per-namespace budgets — keep one client from filling the bucket (public-safe).
	if not dest_dir.exists() and nsd.is_dir():
	if sum(1 for d in nsd.iterdir() if d.is_dir()) >= MAX_PROJECTS_PER_NS:
	return JSONResponse({"error": f"project limit reached (max {MAX_PROJECTS_PER_NS} per user)"}, status_code=409)
	if dest_dir.is_dir() and sum(1 for _ in dest_dir.glob("*.jsonl")) >= MAX_SESSIONS_PER_PROJECT:
	return JSONResponse({"error": f"session limit reached for this project (max {MAX_SESSIONS_PER_PROJECT})"}, status_code=409)
	dest_dir.mkdir(parents=True, exist_ok=True)
	dest = dest_dir / f"{uuid.uuid4().hex}.jsonl"
	dest.write_bytes(data)
	return {"path": str(dest.resolve()), "name": file.filename}


	@app.get("/api/analyze")
	def api_analyze(path: str = "", x_her_client: str = Header(default="")):
	p = srv._safe_session_path(path or None)
	if p is None or not _client_owns(p, x_her_client):
	return JSONResponse({"error": "path not allowed"}, status_code=400)
	try:
	return srv._analyze_cached(p)
	except Exception as e:
	_log_err("analyze", e)
	return JSONResponse({"error": "analyze failed"}, status_code=500)


	@app.get("/api/project")
	def api_project(cwd: str = "", x_her_client: str = Header(default="")):
	if not cwd:
	return JSONResponse({"error": "cwd required"}, status_code=400)
	try:
	# Deterministic only; the prose narrative comes from the GPU `project_narrative`
	# Gradio endpoint (auth-forwarded), not this plain-REST route.
	return srv._project(cwd, with_narrative=False, projects_dir=str(_ns_dir(x_her_client)))
	except Exception as e:
	_log_err("project", e)
	return JSONResponse({"error": "could not load project"}, status_code=500)


	@app.post("/api/clear")
	async def api_clear(client: str = "", x_her_client: str = Header(default="")):
	"""Wipe the caller's namespace (their uploaded sessions). `client` is also read
	from the query string so navigator.sendBeacon (which can't set headers) works on
	tab-close. Per-client: never touches anyone else's data."""
	cid = client or x_her_client
	nsd = _ns_dir(cid)
	removed = 0
	try:
	if cid and nsd.is_dir():
	removed = sum(1 for _ in nsd.rglob("*.jsonl"))
	shutil.rmtree(nsd, ignore_errors=True)
	srv._CACHE.clear() # drop any cached analysis for the wiped files
	except Exception:
	pass
	return {"ok": True, "cleared": removed}


	@app.get("/api/consent")
	def api_consent_get():
	return srv._CONSENT


	@app.post("/api/consent")
	async def api_consent_post(request_body: dict \| None = None):
	body = request_body or {}
	# default to False when missing so a malformed/empty body cannot opt anyone in.
	srv._save_consent(bool(body.get("accepted", False)), bool(body.get("share", False)))
	return srv._CONSENT


	@app.get("/api/demo-video")
	def api_demo_video():
	"""Stream the recorded product demo. On the Space it lives on the bucket at
	`/data/_assets/her-demo.mp4` (uploaded out-of-band — never a user upload, never swept);
	locally we fall back to the repo's `demo/` copy so the button works in dev. FileResponse
	honours Range requests, so the player can seek. 404 (the UI handles it) when absent."""
	for p in (DATA_DIR / ASSETS_DIRNAME / DEMO_VIDEO_NAME, REPO / "demo" / "Her Demo.mp4"):
	if p.is_file():
	return FileResponse(str(p), media_type="video/mp4")
	return JSONResponse({"error": "demo video not available"}, status_code=404)


	# --------------------------------------------------------------------------- #
	# GPU narration endpoints — Gradio API (@app.api), called via @gradio/client so the
	# HF iframe auth headers forward for ZeroGPU quota. `client` scopes to the caller's
	# namespace. The only @spaces.GPU code is inside narrator.hf_narrator._generate.
	# --------------------------------------------------------------------------- #
	@app.api(name="overview")
	def overview(path: str = "", client: str = "") -> dict:
	p = srv._safe_session_path(path or None)
	if p is None or not _client_owns(p, client):
	return {"overview": "", "model": None, "error": "path not allowed"}
	try:
	return srv._overview(srv._analyze_cached(p))
	except Exception as e:
	_log_err("overview", e)
	return {"overview": "", "model": None, "error": "overview failed"}


	@app.api(name="advice")
	def advice(path: str = "", client: str = "") -> dict:
	p = srv._safe_session_path(path or None)
	if p is None or not _client_owns(p, client):
	return {"recommendations": [], "model": None, "error": "path not allowed"}
	try:
	return srv._advice(srv._analyze_cached(p))
	except Exception as e:
	_log_err("advice", e)
	return {"recommendations": [], "model": None, "error": "advice failed"}


	@app.api(name="chat")
	def chat(question: str = "", path: str = "", client: str = "") -> dict:
	question = (question or "").strip()
	if not question:
	return {"answer": "", "citedTurns": [], "error": "empty question"}
	p = srv._safe_session_path(path or None)
	if p is None or not _client_owns(p, client):
	return {"answer": "", "citedTurns": [], "error": "path not allowed"}
	try:
	return srv._chat(question, p)
	except Exception as e:
	_log_err("chat", e)
	return {"answer": "", "citedTurns": [], "error": "chat failed"}


	@app.api(name="project_chat")
	def project_chat(question: str = "", cwd: str = "", client: str = "") -> dict:
	question = (question or "").strip()
	if not question:
	return {"answer": "", "sessionHits": [], "error": "empty question"}
	if not cwd:
	return {"answer": "", "sessionHits": [], "error": "cwd required"}
	try:
	return srv._project_chat(question, cwd, projects_dir=str(_ns_dir(client)))
	except Exception as e:
	_log_err("project_chat", e)
	return {"answer": "", "sessionHits": [], "error": "project chat failed"}


	@app.api(name="project_narrative")
	def project_narrative(cwd: str = "", client: str = "") -> dict:
	if not cwd:
	return {"narrative": "", "model": None}
	try:
	refs = srv._project_sessions(cwd, str(_ns_dir(client)))
	briefs = []
	for s in refs[: srv._PROJECT_CAP]:
	try:
	briefs.append(srv._brief(Path(s.path)))
	except Exception:
	continue
	return srv._project_narrative(cwd, briefs)
	except Exception as e:
	_log_err("project_narrative", e)
	return {"narrative": "", "model": None, "error": "narrative failed"}


	# --------------------------------------------------------------------------- #
	# TTL sweeper — the hard privacy guarantee. Deletes any uploaded session older than
	# HER_RETENTION_HOURS and prunes empty namespace dirs. Runs at startup + on a timer.
	# --------------------------------------------------------------------------- #
	def _sweep_once() -> int:
	cutoff = time.time() - RETENTION_HOURS * 3600
	removed = 0
	if not DATA_DIR.exists():
	return 0
	for root, _dirs, files in os.walk(DATA_DIR):
	if any(d in Path(root).parts for d in PROTECTED_DIRNAMES):
	continue # NEVER sweep shared state — the binary registry or the demo asset
	for fn in files:
	if not fn.endswith(".jsonl"):
	continue # only ever delete uploaded sessions, never registry/state json
	fp = os.path.join(root, fn)
	try:
	if os.path.getmtime(fp) < cutoff:
	os.remove(fp)
	removed += 1
	except OSError:
	pass
	# prune now-empty dirs bottom-up (keep DATA_DIR itself and the registry)
	for root, _dirs, _files in os.walk(DATA_DIR, topdown=False):
	if os.path.abspath(root) == str(DATA_DIR) or any(d in Path(root).parts for d in PROTECTED_DIRNAMES):
	continue
	try:
	if not os.listdir(root):
	os.rmdir(root)
	except OSError:
	pass
	if removed:
	try:
	srv._CACHE.clear()
	except Exception:
	pass
	return removed


	def _sweeper_loop():
	while True:
	try:
	_sweep_once()
	except Exception:
	pass
	time.sleep(SWEEP_INTERVAL)


	def _start_sweeper():
	try:
	_sweep_once() # clear anything stale at boot
	except Exception:
	pass
	threading.Thread(target=_sweeper_loop, daemon=True, name="her-ttl-sweeper").start()


	# --------------------------------------------------------------------------- #
	# Static: serve the built React SPA (ui/dist). The app has NO client-side router
	# (navigation is state-based), so we serve index.html at "/", the hashed bundles
	# under /assets, the pulled logos under /binary-logos, and the few root images by
	# EXACT path. We deliberately avoid any wildcard/catch-all: Gradio registers its own
	# /gradio_api/* and /config routes at launch() — AFTER these — so a greedy route here
	# would shadow them and break @gradio/client + ZeroGPU (and Gradio's startup check).
	# --------------------------------------------------------------------------- #
	if (DIST / "assets").is_dir():
	app.mount("/assets", StaticFiles(directory=str(DIST / "assets")), name="assets")
	if (DIST / "binary-logos").is_dir():
	app.mount("/binary-logos", StaticFiles(directory=str(DIST / "binary-logos")), name="binary-logos")
	if (DIST / "brand").is_dir():
	app.mount("/brand", StaticFiles(directory=str(DIST / "brand")), name="brand") # "built on" logos
	if (DIST / "fonts").is_dir():
	app.mount("/fonts", StaticFiles(directory=str(DIST / "fonts")), name="fonts") # self-hosted webfonts

	_ROOT_STATIC = [
	"favicon.png", "her-logo-light.png", "her-logo.png", "her-mark-light.png", "her-mark.png",
	"fonts.css",
	]


	def _root_route(fname: str):
	async def _route():
	p = DIST / fname
	if p.is_file():
	return FileResponse(str(p))
	return JSONResponse({"error": "not found"}, status_code=404)
	return _route


	for _fn in _ROOT_STATIC:
	app.add_api_route(f"/{_fn}", _root_route(_fn), methods=["GET"])


	@app.get("/")
	def index():
	idx = DIST / "index.html"
	if idx.is_file():
	return FileResponse(str(idx))
	return JSONResponse(
	{"error": "UI not built — run `cd ui && npm run build` before deploying."},
	status_code=503,
	)


	# Gradio Server mode: HF Spaces (Gradio SDK) runs this file and serves `app` on 7860.
	_start_sweeper()
	# Background binary enricher: drains unknown tool-names discovered during analysis and
	# resolves them (local bundled DB → Nemotron → public registries), writing the shared
	# learned registry on the bucket so later users get better detection. server/app.py owns
	# the daemon + queue; it shares to R2 only on explicit consent (off by default here).
	try:
	srv._start_enricher()
	except Exception:
	pass
	app.launch(
	server_name="0.0.0.0",
	server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))),
	show_error=False, # don't surface server tracebacks to clients (info-disclosure)
	)