#!/usr/bin/env python3 """Her · हेर — Hugging Face ZeroGPU Space entrypoint (Gradio Server mode). ZeroGPU is Gradio-SDK-only and its GPU quota requires the HF iframe auth headers to be forwarded on GPU-invoking calls — a plain `fetch` to a custom route that triggers `@spaces.GPU` bypasses that and fails. So this app uses **Gradio Server mode** (`gradio.Server`, a FastAPI server with Gradio's API engine): * DETERMINISTIC engine endpoints (no GPU) are plain FastAPI routes the React app calls with `fetch`: GET /api/health GET /api/sessions POST /api/upload GET /api/analyze?path= GET /api/project?cwd= POST /api/clear GET/POST /api/consent * GPU narration endpoints are Gradio API endpoints (`@app.api`) the browser calls via `@gradio/client` (which forwards the auth headers ZeroGPU needs): overview · advice · chat · project_chat · project_narrative STORAGE & PRIVACY (the hosted Space): * Uploaded sessions are stored on an HF **storage bucket** mounted read-write at `HER_DATA_DIR` (`/data`), namespaced per client: `/data///.jsonl` where `ns = sha256(client-token)`. The client token is generated in the browser (localStorage) and sent as the `X-Her-Client` header (REST) / `client` arg (Gradio), so every user only ever SEES and ANALYZES their own sessions — public-safe. * Trace content is auto-deleted: a background sweeper removes anything older than `HER_RETENTION_HOURS` (24h) — the hard guarantee — and `POST /api/clear` wipes the caller's namespace immediately (the UI calls it on a "Clear" click and on tab-close). The deterministic ENGINE is reused unchanged from the local product; only the transport and the model backend differ. server/app.py stays the single source of truth. """ from __future__ import annotations import hashlib import os import re import shutil import sys import threading import time import uuid from pathlib import Path # Select the HF/ZeroGPU narrator backend BEFORE importing server helpers, so every # get_narrator() call in server/app.py resolves to the transformers model. os.environ.setdefault("HER_BACKEND", "hf") # No usage telemetry to gradio.app from a privacy-focused app (set before importing gradio). os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") import spaces # noqa: F401 (ZeroGPU runtime hook; effect-free off-Space) # Force the model to load at MODULE level (ZeroGPU requirement: cuda placement under # CUDA-emulation at import; real GPU only inside @spaces.GPU). Safe if it fails — the # narrator reports not-ready and callers fall back to the deterministic prose. import narrator.hf_narrator # noqa: F401,E402 import gradio as gr # noqa: E402 from fastapi import File, Form, Header, UploadFile # noqa: E402 from fastapi.responses import FileResponse, JSONResponse # noqa: E402 from fastapi.staticfiles import StaticFiles # noqa: E402 import server.app as srv # noqa: E402 (the engine request logic — reused as-is) REPO = Path(__file__).resolve().parent DIST = REPO / "ui" / "dist" # Storage root: the HF bucket mount on the Space (HER_DATA_DIR=/data), else a local dir. # server/app.py is told HER_EXTRA_ROOT=/data so _safe_session_path permits paths here. DATA_DIR = Path(os.environ.get("HER_DATA_DIR", str(REPO / ".uploads"))).resolve() DATA_DIR.mkdir(parents=True, exist_ok=True) RETENTION_HOURS = float(os.environ.get("HER_RETENTION_HOURS", "24")) SWEEP_INTERVAL = int(os.environ.get("HER_SWEEP_INTERVAL", "1800")) # 30 min # Public-safe budgets — one client must not be able to exhaust memory or the bucket. MAX_UPLOAD_BYTES = 70 * 1024 * 1024 # 70 MB per uploaded session file MAX_PROJECTS_PER_NS = 50 # projects (subdirs) per client namespace MAX_SESSIONS_PER_PROJECT = 50 # .jsonl sessions per project subdir def _log_err(where: str, e: Exception) -> None: """Server-side error detail (stderr) so client responses can stay generic — we never hand internal paths / tracebacks back to the browser (info-disclosure).""" print(f"[her] {where}: {type(e).__name__}: {e}", file=sys.stderr, flush=True) # The shared, persistent binary registry the enricher writes lives OUTSIDE every user # namespace (`/data/_registry/...` via HER_LEARNED_PATH). Users can never reach it: # uploads only ever land under `/data//`, and the sweeper skips it. REGISTRY_DIRNAME = "_registry" # The recorded product demo (mp4) is a shared, non-user asset on the bucket at # `/data/_assets/her-demo.mp4` (uploaded out-of-band, served read-only by /api/demo-video). # Like the registry it is never a user upload and must never be swept. ASSETS_DIRNAME = "_assets" DEMO_VIDEO_NAME = "her-demo.mp4" # Bucket dirs that hold shared state, not per-user trace content — the sweeper skips them. PROTECTED_DIRNAMES = (REGISTRY_DIRNAME, ASSETS_DIRNAME) _LEARNED = os.environ.get("HER_LEARNED_PATH") if _LEARNED: try: Path(_LEARNED).parent.mkdir(parents=True, exist_ok=True) except OSError: pass app = gr.Server() # --------------------------------------------------------------------------- # # per-client namespace — isolates each browser's uploads (public-safe). The token # is opaque to us; we only hash it to a directory name. # --------------------------------------------------------------------------- # def _ns(client: str) -> str: return hashlib.sha256((client or "anon").encode("utf-8")).hexdigest()[:16] def _ns_dir(client: str) -> Path: return DATA_DIR / _ns(client) def _safe_subdir(name: str) -> str: """Sanitize a caller-supplied project subdir (no traversal); default 'uploads'. '.' is dropped entirely so '..'/dot-segments can never escape the namespace dir.""" s = re.sub(r"[^A-Za-z0-9_-]", "_", (name or "").strip()) return s[:80] or "uploads" def _client_owns(p: Path, client: str) -> bool: """A bucket-stored path must belong to the requesting client's namespace. Paths outside DATA_DIR (the bundled fixture / local sessions) are unaffected.""" try: if not p.is_relative_to(DATA_DIR): return True return p.is_relative_to(_ns_dir(client)) except Exception: return False # fail CLOSED — a security predicate must never default to "allow" # --------------------------------------------------------------------------- # # DETERMINISTIC engine endpoints — plain FastAPI routes, no GPU (React `fetch`). # --------------------------------------------------------------------------- # @app.get("/api/health") def api_health(): try: ready = srv.get_narrator().wait_until_ready(max_wait=0.1, interval=0.1) except Exception: ready = False # `llama` is the UI's flag for "model reachable"; `gpu` tells the UI to route # narration through @gradio/client (auth forwards for ZeroGPU quota). # `space` (HF sets SPACE_ID="owner/name" in the container) lets the UI build a # download command that points at THIS Space, not the author's. Empty locally. return {"ok": True, "llama": bool(ready), "gpu": True, "space": os.environ.get("SPACE_ID", "")} @app.get("/api/sessions") def api_sessions(x_her_client: str = Header(default="")): try: # Scoped to THIS client's namespace — you only ever see your own uploads. return srv._sessions_payload(projects_dir=str(_ns_dir(x_her_client))) except Exception as e: # never 500 the browser _log_err("sessions", e) return {"error": "could not list sessions", "projects": [], "total": 0} @app.post("/api/upload") async def api_upload( file: UploadFile = File(...), project: str = Form(default="uploads"), x_her_client: str = Header(default=""), ): """Store an uploaded .jsonl under the caller's namespace: /data///.jsonl. `project` (the bulk script passes the encoded project dir) becomes the subdir so discovery's /*/*.jsonl glob groups them. Guarded: .jsonl only, a hard size cap, and per-namespace project/session budgets.""" name = (file.filename or "").lower() if not name.endswith(".jsonl"): return JSONResponse({"error": "only .jsonl files are accepted"}, status_code=400) # Bounded read: pull at most the cap (+1 sentinel) into memory — a multi-GB upload # can't OOM the box. read(N) returns ≤N bytes; cap+1 back means it's over budget. data = await file.read(MAX_UPLOAD_BYTES + 1) if len(data) > MAX_UPLOAD_BYTES: return JSONResponse({"error": "file too large (max 70 MB per session)"}, status_code=413) if not data.strip(): return JSONResponse({"error": "empty file"}, status_code=400) nsd = _ns_dir(x_her_client) dest_dir = nsd / _safe_subdir(project) # belt + braces: the destination must stay inside the caller's namespace dir. try: if not dest_dir.resolve().is_relative_to(nsd.resolve()): return JSONResponse({"error": "bad project"}, status_code=400) except Exception: return JSONResponse({"error": "bad project"}, status_code=400) # per-namespace budgets — keep one client from filling the bucket (public-safe). if not dest_dir.exists() and nsd.is_dir(): if sum(1 for d in nsd.iterdir() if d.is_dir()) >= MAX_PROJECTS_PER_NS: return JSONResponse({"error": f"project limit reached (max {MAX_PROJECTS_PER_NS} per user)"}, status_code=409) if dest_dir.is_dir() and sum(1 for _ in dest_dir.glob("*.jsonl")) >= MAX_SESSIONS_PER_PROJECT: return JSONResponse({"error": f"session limit reached for this project (max {MAX_SESSIONS_PER_PROJECT})"}, status_code=409) dest_dir.mkdir(parents=True, exist_ok=True) dest = dest_dir / f"{uuid.uuid4().hex}.jsonl" dest.write_bytes(data) return {"path": str(dest.resolve()), "name": file.filename} @app.get("/api/analyze") def api_analyze(path: str = "", x_her_client: str = Header(default="")): p = srv._safe_session_path(path or None) if p is None or not _client_owns(p, x_her_client): return JSONResponse({"error": "path not allowed"}, status_code=400) try: return srv._analyze_cached(p) except Exception as e: _log_err("analyze", e) return JSONResponse({"error": "analyze failed"}, status_code=500) @app.get("/api/project") def api_project(cwd: str = "", x_her_client: str = Header(default="")): if not cwd: return JSONResponse({"error": "cwd required"}, status_code=400) try: # Deterministic only; the prose narrative comes from the GPU `project_narrative` # Gradio endpoint (auth-forwarded), not this plain-REST route. return srv._project(cwd, with_narrative=False, projects_dir=str(_ns_dir(x_her_client))) except Exception as e: _log_err("project", e) return JSONResponse({"error": "could not load project"}, status_code=500) @app.post("/api/clear") async def api_clear(client: str = "", x_her_client: str = Header(default="")): """Wipe the caller's namespace (their uploaded sessions). `client` is also read from the query string so navigator.sendBeacon (which can't set headers) works on tab-close. Per-client: never touches anyone else's data.""" cid = client or x_her_client nsd = _ns_dir(cid) removed = 0 try: if cid and nsd.is_dir(): removed = sum(1 for _ in nsd.rglob("*.jsonl")) shutil.rmtree(nsd, ignore_errors=True) srv._CACHE.clear() # drop any cached analysis for the wiped files except Exception: pass return {"ok": True, "cleared": removed} @app.get("/api/consent") def api_consent_get(): return srv._CONSENT @app.post("/api/consent") async def api_consent_post(request_body: dict | None = None): body = request_body or {} # default to False when missing so a malformed/empty body cannot opt anyone in. srv._save_consent(bool(body.get("accepted", False)), bool(body.get("share", False))) return srv._CONSENT @app.get("/api/demo-video") def api_demo_video(): """Stream the recorded product demo. On the Space it lives on the bucket at `/data/_assets/her-demo.mp4` (uploaded out-of-band — never a user upload, never swept); locally we fall back to the repo's `demo/` copy so the button works in dev. FileResponse honours Range requests, so the player can seek. 404 (the UI handles it) when absent.""" for p in (DATA_DIR / ASSETS_DIRNAME / DEMO_VIDEO_NAME, REPO / "demo" / "Her Demo.mp4"): if p.is_file(): return FileResponse(str(p), media_type="video/mp4") return JSONResponse({"error": "demo video not available"}, status_code=404) # --------------------------------------------------------------------------- # # GPU narration endpoints — Gradio API (@app.api), called via @gradio/client so the # HF iframe auth headers forward for ZeroGPU quota. `client` scopes to the caller's # namespace. The only @spaces.GPU code is inside narrator.hf_narrator._generate. # --------------------------------------------------------------------------- # @app.api(name="overview") def overview(path: str = "", client: str = "") -> dict: p = srv._safe_session_path(path or None) if p is None or not _client_owns(p, client): return {"overview": "", "model": None, "error": "path not allowed"} try: return srv._overview(srv._analyze_cached(p)) except Exception as e: _log_err("overview", e) return {"overview": "", "model": None, "error": "overview failed"} @app.api(name="advice") def advice(path: str = "", client: str = "") -> dict: p = srv._safe_session_path(path or None) if p is None or not _client_owns(p, client): return {"recommendations": [], "model": None, "error": "path not allowed"} try: return srv._advice(srv._analyze_cached(p)) except Exception as e: _log_err("advice", e) return {"recommendations": [], "model": None, "error": "advice failed"} @app.api(name="chat") def chat(question: str = "", path: str = "", client: str = "") -> dict: question = (question or "").strip() if not question: return {"answer": "", "citedTurns": [], "error": "empty question"} p = srv._safe_session_path(path or None) if p is None or not _client_owns(p, client): return {"answer": "", "citedTurns": [], "error": "path not allowed"} try: return srv._chat(question, p) except Exception as e: _log_err("chat", e) return {"answer": "", "citedTurns": [], "error": "chat failed"} @app.api(name="project_chat") def project_chat(question: str = "", cwd: str = "", client: str = "") -> dict: question = (question or "").strip() if not question: return {"answer": "", "sessionHits": [], "error": "empty question"} if not cwd: return {"answer": "", "sessionHits": [], "error": "cwd required"} try: return srv._project_chat(question, cwd, projects_dir=str(_ns_dir(client))) except Exception as e: _log_err("project_chat", e) return {"answer": "", "sessionHits": [], "error": "project chat failed"} @app.api(name="project_narrative") def project_narrative(cwd: str = "", client: str = "") -> dict: if not cwd: return {"narrative": "", "model": None} try: refs = srv._project_sessions(cwd, str(_ns_dir(client))) briefs = [] for s in refs[: srv._PROJECT_CAP]: try: briefs.append(srv._brief(Path(s.path))) except Exception: continue return srv._project_narrative(cwd, briefs) except Exception as e: _log_err("project_narrative", e) return {"narrative": "", "model": None, "error": "narrative failed"} # --------------------------------------------------------------------------- # # TTL sweeper — the hard privacy guarantee. Deletes any uploaded session older than # HER_RETENTION_HOURS and prunes empty namespace dirs. Runs at startup + on a timer. # --------------------------------------------------------------------------- # def _sweep_once() -> int: cutoff = time.time() - RETENTION_HOURS * 3600 removed = 0 if not DATA_DIR.exists(): return 0 for root, _dirs, files in os.walk(DATA_DIR): if any(d in Path(root).parts for d in PROTECTED_DIRNAMES): continue # NEVER sweep shared state — the binary registry or the demo asset for fn in files: if not fn.endswith(".jsonl"): continue # only ever delete uploaded sessions, never registry/state json fp = os.path.join(root, fn) try: if os.path.getmtime(fp) < cutoff: os.remove(fp) removed += 1 except OSError: pass # prune now-empty dirs bottom-up (keep DATA_DIR itself and the registry) for root, _dirs, _files in os.walk(DATA_DIR, topdown=False): if os.path.abspath(root) == str(DATA_DIR) or any(d in Path(root).parts for d in PROTECTED_DIRNAMES): continue try: if not os.listdir(root): os.rmdir(root) except OSError: pass if removed: try: srv._CACHE.clear() except Exception: pass return removed def _sweeper_loop(): while True: try: _sweep_once() except Exception: pass time.sleep(SWEEP_INTERVAL) def _start_sweeper(): try: _sweep_once() # clear anything stale at boot except Exception: pass threading.Thread(target=_sweeper_loop, daemon=True, name="her-ttl-sweeper").start() # --------------------------------------------------------------------------- # # Static: serve the built React SPA (ui/dist). The app has NO client-side router # (navigation is state-based), so we serve index.html at "/", the hashed bundles # under /assets, the pulled logos under /binary-logos, and the few root images by # EXACT path. We deliberately avoid any wildcard/catch-all: Gradio registers its own # /gradio_api/* and /config routes at launch() — AFTER these — so a greedy route here # would shadow them and break @gradio/client + ZeroGPU (and Gradio's startup check). # --------------------------------------------------------------------------- # if (DIST / "assets").is_dir(): app.mount("/assets", StaticFiles(directory=str(DIST / "assets")), name="assets") if (DIST / "binary-logos").is_dir(): app.mount("/binary-logos", StaticFiles(directory=str(DIST / "binary-logos")), name="binary-logos") if (DIST / "brand").is_dir(): app.mount("/brand", StaticFiles(directory=str(DIST / "brand")), name="brand") # "built on" logos if (DIST / "fonts").is_dir(): app.mount("/fonts", StaticFiles(directory=str(DIST / "fonts")), name="fonts") # self-hosted webfonts _ROOT_STATIC = [ "favicon.png", "her-logo-light.png", "her-logo.png", "her-mark-light.png", "her-mark.png", "fonts.css", ] def _root_route(fname: str): async def _route(): p = DIST / fname if p.is_file(): return FileResponse(str(p)) return JSONResponse({"error": "not found"}, status_code=404) return _route for _fn in _ROOT_STATIC: app.add_api_route(f"/{_fn}", _root_route(_fn), methods=["GET"]) @app.get("/") def index(): idx = DIST / "index.html" if idx.is_file(): return FileResponse(str(idx)) return JSONResponse( {"error": "UI not built — run `cd ui && npm run build` before deploying."}, status_code=503, ) # Gradio Server mode: HF Spaces (Gradio SDK) runs this file and serves `app` on 7860. _start_sweeper() # Background binary enricher: drains unknown tool-names discovered during analysis and # resolves them (local bundled DB → Nemotron → public registries), writing the shared # learned registry on the bucket so later users get better detection. server/app.py owns # the daemon + queue; it shares to R2 only on explicit consent (off by default here). try: srv._start_enricher() except Exception: pass app.launch( server_name="0.0.0.0", server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))), show_error=False, # don't surface server tracebacks to clients (info-disclosure) )