Spaces:
Running on Zero
Running on Zero
| #!/usr/bin/env python3 | |
| """Her · हेर — Hugging Face ZeroGPU Space entrypoint (Gradio Server mode). | |
| ZeroGPU is Gradio-SDK-only and its GPU quota requires the HF iframe auth headers to | |
| be forwarded on GPU-invoking calls — a plain `fetch` to a custom route that triggers | |
| `@spaces.GPU` bypasses that and fails. So this app uses **Gradio Server mode** | |
| (`gradio.Server`, a FastAPI server with Gradio's API engine): | |
| * DETERMINISTIC engine endpoints (no GPU) are plain FastAPI routes the React app | |
| calls with `fetch`: | |
| GET /api/health GET /api/sessions | |
| POST /api/upload GET /api/analyze?path= | |
| GET /api/project?cwd= POST /api/clear GET/POST /api/consent | |
| * GPU narration endpoints are Gradio API endpoints (`@app.api`) the browser calls | |
| via `@gradio/client` (which forwards the auth headers ZeroGPU needs): | |
| overview · advice · chat · project_chat · project_narrative | |
| STORAGE & PRIVACY (the hosted Space): | |
| * Uploaded sessions are stored on an HF **storage bucket** mounted read-write at | |
| `HER_DATA_DIR` (`/data`), namespaced per client: `/data/<ns>/<project>/<file>.jsonl` | |
| where `ns = sha256(client-token)`. The client token is generated in the browser | |
| (localStorage) and sent as the `X-Her-Client` header (REST) / `client` arg (Gradio), | |
| so every user only ever SEES and ANALYZES their own sessions — public-safe. | |
| * Trace content is auto-deleted: a background sweeper removes anything older than | |
| `HER_RETENTION_HOURS` (24h) — the hard guarantee — and `POST /api/clear` wipes the | |
| caller's namespace immediately (the UI calls it on a "Clear" click and on tab-close). | |
| The deterministic ENGINE is reused unchanged from the local product; only the transport | |
| and the model backend differ. server/app.py stays the single source of truth. | |
| """ | |
| from __future__ import annotations | |
| import hashlib | |
| import os | |
| import re | |
| import shutil | |
| import sys | |
| import threading | |
| import time | |
| import uuid | |
| from pathlib import Path | |
| # Select the HF/ZeroGPU narrator backend BEFORE importing server helpers, so every | |
| # get_narrator() call in server/app.py resolves to the transformers model. | |
| os.environ.setdefault("HER_BACKEND", "hf") | |
| # No usage telemetry to gradio.app from a privacy-focused app (set before importing gradio). | |
| os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False") | |
| import spaces # noqa: F401 (ZeroGPU runtime hook; effect-free off-Space) | |
| # Force the model to load at MODULE level (ZeroGPU requirement: cuda placement under | |
| # CUDA-emulation at import; real GPU only inside @spaces.GPU). Safe if it fails — the | |
| # narrator reports not-ready and callers fall back to the deterministic prose. | |
| import narrator.hf_narrator # noqa: F401,E402 | |
| import gradio as gr # noqa: E402 | |
| from fastapi import File, Form, Header, UploadFile # noqa: E402 | |
| from fastapi.responses import FileResponse, JSONResponse # noqa: E402 | |
| from fastapi.staticfiles import StaticFiles # noqa: E402 | |
| import server.app as srv # noqa: E402 (the engine request logic — reused as-is) | |
| REPO = Path(__file__).resolve().parent | |
| DIST = REPO / "ui" / "dist" | |
| # Storage root: the HF bucket mount on the Space (HER_DATA_DIR=/data), else a local dir. | |
| # server/app.py is told HER_EXTRA_ROOT=/data so _safe_session_path permits paths here. | |
| DATA_DIR = Path(os.environ.get("HER_DATA_DIR", str(REPO / ".uploads"))).resolve() | |
| DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| RETENTION_HOURS = float(os.environ.get("HER_RETENTION_HOURS", "24")) | |
| SWEEP_INTERVAL = int(os.environ.get("HER_SWEEP_INTERVAL", "1800")) # 30 min | |
| # Public-safe budgets — one client must not be able to exhaust memory or the bucket. | |
| MAX_UPLOAD_BYTES = 70 * 1024 * 1024 # 70 MB per uploaded session file | |
| MAX_PROJECTS_PER_NS = 50 # projects (subdirs) per client namespace | |
| MAX_SESSIONS_PER_PROJECT = 50 # .jsonl sessions per project subdir | |
| def _log_err(where: str, e: Exception) -> None: | |
| """Server-side error detail (stderr) so client responses can stay generic — we | |
| never hand internal paths / tracebacks back to the browser (info-disclosure).""" | |
| print(f"[her] {where}: {type(e).__name__}: {e}", file=sys.stderr, flush=True) | |
| # The shared, persistent binary registry the enricher writes lives OUTSIDE every user | |
| # namespace (`/data/_registry/...` via HER_LEARNED_PATH). Users can never reach it: | |
| # uploads only ever land under `/data/<sha256(token)>/`, and the sweeper skips it. | |
| REGISTRY_DIRNAME = "_registry" | |
| # The recorded product demo (mp4) is a shared, non-user asset on the bucket at | |
| # `/data/_assets/her-demo.mp4` (uploaded out-of-band, served read-only by /api/demo-video). | |
| # Like the registry it is never a user upload and must never be swept. | |
| ASSETS_DIRNAME = "_assets" | |
| DEMO_VIDEO_NAME = "her-demo.mp4" | |
| # Bucket dirs that hold shared state, not per-user trace content — the sweeper skips them. | |
| PROTECTED_DIRNAMES = (REGISTRY_DIRNAME, ASSETS_DIRNAME) | |
| _LEARNED = os.environ.get("HER_LEARNED_PATH") | |
| if _LEARNED: | |
| try: | |
| Path(_LEARNED).parent.mkdir(parents=True, exist_ok=True) | |
| except OSError: | |
| pass | |
| app = gr.Server() | |
| # --------------------------------------------------------------------------- # | |
| # per-client namespace — isolates each browser's uploads (public-safe). The token | |
| # is opaque to us; we only hash it to a directory name. | |
| # --------------------------------------------------------------------------- # | |
| def _ns(client: str) -> str: | |
| return hashlib.sha256((client or "anon").encode("utf-8")).hexdigest()[:16] | |
| def _ns_dir(client: str) -> Path: | |
| return DATA_DIR / _ns(client) | |
| def _safe_subdir(name: str) -> str: | |
| """Sanitize a caller-supplied project subdir (no traversal); default 'uploads'. | |
| '.' is dropped entirely so '..'/dot-segments can never escape the namespace dir.""" | |
| s = re.sub(r"[^A-Za-z0-9_-]", "_", (name or "").strip()) | |
| return s[:80] or "uploads" | |
| def _client_owns(p: Path, client: str) -> bool: | |
| """A bucket-stored path must belong to the requesting client's namespace. Paths | |
| outside DATA_DIR (the bundled fixture / local sessions) are unaffected.""" | |
| try: | |
| if not p.is_relative_to(DATA_DIR): | |
| return True | |
| return p.is_relative_to(_ns_dir(client)) | |
| except Exception: | |
| return False # fail CLOSED — a security predicate must never default to "allow" | |
| # --------------------------------------------------------------------------- # | |
| # DETERMINISTIC engine endpoints — plain FastAPI routes, no GPU (React `fetch`). | |
| # --------------------------------------------------------------------------- # | |
| def api_health(): | |
| try: | |
| ready = srv.get_narrator().wait_until_ready(max_wait=0.1, interval=0.1) | |
| except Exception: | |
| ready = False | |
| # `llama` is the UI's flag for "model reachable"; `gpu` tells the UI to route | |
| # narration through @gradio/client (auth forwards for ZeroGPU quota). | |
| # `space` (HF sets SPACE_ID="owner/name" in the container) lets the UI build a | |
| # download command that points at THIS Space, not the author's. Empty locally. | |
| return {"ok": True, "llama": bool(ready), "gpu": True, "space": os.environ.get("SPACE_ID", "")} | |
| def api_sessions(x_her_client: str = Header(default="")): | |
| try: | |
| # Scoped to THIS client's namespace — you only ever see your own uploads. | |
| return srv._sessions_payload(projects_dir=str(_ns_dir(x_her_client))) | |
| except Exception as e: # never 500 the browser | |
| _log_err("sessions", e) | |
| return {"error": "could not list sessions", "projects": [], "total": 0} | |
| async def api_upload( | |
| file: UploadFile = File(...), | |
| project: str = Form(default="uploads"), | |
| x_her_client: str = Header(default=""), | |
| ): | |
| """Store an uploaded .jsonl under the caller's namespace: | |
| /data/<ns>/<project>/<uuid>.jsonl. `project` (the bulk script passes the encoded | |
| project dir) becomes the subdir so discovery's <ns>/*/*.jsonl glob groups them. | |
| Guarded: .jsonl only, a hard size cap, and per-namespace project/session budgets.""" | |
| name = (file.filename or "").lower() | |
| if not name.endswith(".jsonl"): | |
| return JSONResponse({"error": "only .jsonl files are accepted"}, status_code=400) | |
| # Bounded read: pull at most the cap (+1 sentinel) into memory — a multi-GB upload | |
| # can't OOM the box. read(N) returns ≤N bytes; cap+1 back means it's over budget. | |
| data = await file.read(MAX_UPLOAD_BYTES + 1) | |
| if len(data) > MAX_UPLOAD_BYTES: | |
| return JSONResponse({"error": "file too large (max 70 MB per session)"}, status_code=413) | |
| if not data.strip(): | |
| return JSONResponse({"error": "empty file"}, status_code=400) | |
| nsd = _ns_dir(x_her_client) | |
| dest_dir = nsd / _safe_subdir(project) | |
| # belt + braces: the destination must stay inside the caller's namespace dir. | |
| try: | |
| if not dest_dir.resolve().is_relative_to(nsd.resolve()): | |
| return JSONResponse({"error": "bad project"}, status_code=400) | |
| except Exception: | |
| return JSONResponse({"error": "bad project"}, status_code=400) | |
| # per-namespace budgets — keep one client from filling the bucket (public-safe). | |
| if not dest_dir.exists() and nsd.is_dir(): | |
| if sum(1 for d in nsd.iterdir() if d.is_dir()) >= MAX_PROJECTS_PER_NS: | |
| return JSONResponse({"error": f"project limit reached (max {MAX_PROJECTS_PER_NS} per user)"}, status_code=409) | |
| if dest_dir.is_dir() and sum(1 for _ in dest_dir.glob("*.jsonl")) >= MAX_SESSIONS_PER_PROJECT: | |
| return JSONResponse({"error": f"session limit reached for this project (max {MAX_SESSIONS_PER_PROJECT})"}, status_code=409) | |
| dest_dir.mkdir(parents=True, exist_ok=True) | |
| dest = dest_dir / f"{uuid.uuid4().hex}.jsonl" | |
| dest.write_bytes(data) | |
| return {"path": str(dest.resolve()), "name": file.filename} | |
| def api_analyze(path: str = "", x_her_client: str = Header(default="")): | |
| p = srv._safe_session_path(path or None) | |
| if p is None or not _client_owns(p, x_her_client): | |
| return JSONResponse({"error": "path not allowed"}, status_code=400) | |
| try: | |
| return srv._analyze_cached(p) | |
| except Exception as e: | |
| _log_err("analyze", e) | |
| return JSONResponse({"error": "analyze failed"}, status_code=500) | |
| def api_project(cwd: str = "", x_her_client: str = Header(default="")): | |
| if not cwd: | |
| return JSONResponse({"error": "cwd required"}, status_code=400) | |
| try: | |
| # Deterministic only; the prose narrative comes from the GPU `project_narrative` | |
| # Gradio endpoint (auth-forwarded), not this plain-REST route. | |
| return srv._project(cwd, with_narrative=False, projects_dir=str(_ns_dir(x_her_client))) | |
| except Exception as e: | |
| _log_err("project", e) | |
| return JSONResponse({"error": "could not load project"}, status_code=500) | |
| async def api_clear(client: str = "", x_her_client: str = Header(default="")): | |
| """Wipe the caller's namespace (their uploaded sessions). `client` is also read | |
| from the query string so navigator.sendBeacon (which can't set headers) works on | |
| tab-close. Per-client: never touches anyone else's data.""" | |
| cid = client or x_her_client | |
| nsd = _ns_dir(cid) | |
| removed = 0 | |
| try: | |
| if cid and nsd.is_dir(): | |
| removed = sum(1 for _ in nsd.rglob("*.jsonl")) | |
| shutil.rmtree(nsd, ignore_errors=True) | |
| srv._CACHE.clear() # drop any cached analysis for the wiped files | |
| except Exception: | |
| pass | |
| return {"ok": True, "cleared": removed} | |
| def api_consent_get(): | |
| return srv._CONSENT | |
| async def api_consent_post(request_body: dict | None = None): | |
| body = request_body or {} | |
| # default to False when missing so a malformed/empty body cannot opt anyone in. | |
| srv._save_consent(bool(body.get("accepted", False)), bool(body.get("share", False))) | |
| return srv._CONSENT | |
| def api_demo_video(): | |
| """Stream the recorded product demo. On the Space it lives on the bucket at | |
| `/data/_assets/her-demo.mp4` (uploaded out-of-band — never a user upload, never swept); | |
| locally we fall back to the repo's `demo/` copy so the button works in dev. FileResponse | |
| honours Range requests, so the player can seek. 404 (the UI handles it) when absent.""" | |
| for p in (DATA_DIR / ASSETS_DIRNAME / DEMO_VIDEO_NAME, REPO / "demo" / "Her Demo.mp4"): | |
| if p.is_file(): | |
| return FileResponse(str(p), media_type="video/mp4") | |
| return JSONResponse({"error": "demo video not available"}, status_code=404) | |
| # --------------------------------------------------------------------------- # | |
| # GPU narration endpoints — Gradio API (@app.api), called via @gradio/client so the | |
| # HF iframe auth headers forward for ZeroGPU quota. `client` scopes to the caller's | |
| # namespace. The only @spaces.GPU code is inside narrator.hf_narrator._generate. | |
| # --------------------------------------------------------------------------- # | |
| def overview(path: str = "", client: str = "") -> dict: | |
| p = srv._safe_session_path(path or None) | |
| if p is None or not _client_owns(p, client): | |
| return {"overview": "", "model": None, "error": "path not allowed"} | |
| try: | |
| return srv._overview(srv._analyze_cached(p)) | |
| except Exception as e: | |
| _log_err("overview", e) | |
| return {"overview": "", "model": None, "error": "overview failed"} | |
| def advice(path: str = "", client: str = "") -> dict: | |
| p = srv._safe_session_path(path or None) | |
| if p is None or not _client_owns(p, client): | |
| return {"recommendations": [], "model": None, "error": "path not allowed"} | |
| try: | |
| return srv._advice(srv._analyze_cached(p)) | |
| except Exception as e: | |
| _log_err("advice", e) | |
| return {"recommendations": [], "model": None, "error": "advice failed"} | |
| def chat(question: str = "", path: str = "", client: str = "") -> dict: | |
| question = (question or "").strip() | |
| if not question: | |
| return {"answer": "", "citedTurns": [], "error": "empty question"} | |
| p = srv._safe_session_path(path or None) | |
| if p is None or not _client_owns(p, client): | |
| return {"answer": "", "citedTurns": [], "error": "path not allowed"} | |
| try: | |
| return srv._chat(question, p) | |
| except Exception as e: | |
| _log_err("chat", e) | |
| return {"answer": "", "citedTurns": [], "error": "chat failed"} | |
| def project_chat(question: str = "", cwd: str = "", client: str = "") -> dict: | |
| question = (question or "").strip() | |
| if not question: | |
| return {"answer": "", "sessionHits": [], "error": "empty question"} | |
| if not cwd: | |
| return {"answer": "", "sessionHits": [], "error": "cwd required"} | |
| try: | |
| return srv._project_chat(question, cwd, projects_dir=str(_ns_dir(client))) | |
| except Exception as e: | |
| _log_err("project_chat", e) | |
| return {"answer": "", "sessionHits": [], "error": "project chat failed"} | |
| def project_narrative(cwd: str = "", client: str = "") -> dict: | |
| if not cwd: | |
| return {"narrative": "", "model": None} | |
| try: | |
| refs = srv._project_sessions(cwd, str(_ns_dir(client))) | |
| briefs = [] | |
| for s in refs[: srv._PROJECT_CAP]: | |
| try: | |
| briefs.append(srv._brief(Path(s.path))) | |
| except Exception: | |
| continue | |
| return srv._project_narrative(cwd, briefs) | |
| except Exception as e: | |
| _log_err("project_narrative", e) | |
| return {"narrative": "", "model": None, "error": "narrative failed"} | |
| # --------------------------------------------------------------------------- # | |
| # TTL sweeper — the hard privacy guarantee. Deletes any uploaded session older than | |
| # HER_RETENTION_HOURS and prunes empty namespace dirs. Runs at startup + on a timer. | |
| # --------------------------------------------------------------------------- # | |
| def _sweep_once() -> int: | |
| cutoff = time.time() - RETENTION_HOURS * 3600 | |
| removed = 0 | |
| if not DATA_DIR.exists(): | |
| return 0 | |
| for root, _dirs, files in os.walk(DATA_DIR): | |
| if any(d in Path(root).parts for d in PROTECTED_DIRNAMES): | |
| continue # NEVER sweep shared state — the binary registry or the demo asset | |
| for fn in files: | |
| if not fn.endswith(".jsonl"): | |
| continue # only ever delete uploaded sessions, never registry/state json | |
| fp = os.path.join(root, fn) | |
| try: | |
| if os.path.getmtime(fp) < cutoff: | |
| os.remove(fp) | |
| removed += 1 | |
| except OSError: | |
| pass | |
| # prune now-empty dirs bottom-up (keep DATA_DIR itself and the registry) | |
| for root, _dirs, _files in os.walk(DATA_DIR, topdown=False): | |
| if os.path.abspath(root) == str(DATA_DIR) or any(d in Path(root).parts for d in PROTECTED_DIRNAMES): | |
| continue | |
| try: | |
| if not os.listdir(root): | |
| os.rmdir(root) | |
| except OSError: | |
| pass | |
| if removed: | |
| try: | |
| srv._CACHE.clear() | |
| except Exception: | |
| pass | |
| return removed | |
| def _sweeper_loop(): | |
| while True: | |
| try: | |
| _sweep_once() | |
| except Exception: | |
| pass | |
| time.sleep(SWEEP_INTERVAL) | |
| def _start_sweeper(): | |
| try: | |
| _sweep_once() # clear anything stale at boot | |
| except Exception: | |
| pass | |
| threading.Thread(target=_sweeper_loop, daemon=True, name="her-ttl-sweeper").start() | |
| # --------------------------------------------------------------------------- # | |
| # Static: serve the built React SPA (ui/dist). The app has NO client-side router | |
| # (navigation is state-based), so we serve index.html at "/", the hashed bundles | |
| # under /assets, the pulled logos under /binary-logos, and the few root images by | |
| # EXACT path. We deliberately avoid any wildcard/catch-all: Gradio registers its own | |
| # /gradio_api/* and /config routes at launch() — AFTER these — so a greedy route here | |
| # would shadow them and break @gradio/client + ZeroGPU (and Gradio's startup check). | |
| # --------------------------------------------------------------------------- # | |
| if (DIST / "assets").is_dir(): | |
| app.mount("/assets", StaticFiles(directory=str(DIST / "assets")), name="assets") | |
| if (DIST / "binary-logos").is_dir(): | |
| app.mount("/binary-logos", StaticFiles(directory=str(DIST / "binary-logos")), name="binary-logos") | |
| if (DIST / "brand").is_dir(): | |
| app.mount("/brand", StaticFiles(directory=str(DIST / "brand")), name="brand") # "built on" logos | |
| if (DIST / "fonts").is_dir(): | |
| app.mount("/fonts", StaticFiles(directory=str(DIST / "fonts")), name="fonts") # self-hosted webfonts | |
| _ROOT_STATIC = [ | |
| "favicon.png", "her-logo-light.png", "her-logo.png", "her-mark-light.png", "her-mark.png", | |
| "fonts.css", | |
| ] | |
| def _root_route(fname: str): | |
| async def _route(): | |
| p = DIST / fname | |
| if p.is_file(): | |
| return FileResponse(str(p)) | |
| return JSONResponse({"error": "not found"}, status_code=404) | |
| return _route | |
| for _fn in _ROOT_STATIC: | |
| app.add_api_route(f"/{_fn}", _root_route(_fn), methods=["GET"]) | |
| def index(): | |
| idx = DIST / "index.html" | |
| if idx.is_file(): | |
| return FileResponse(str(idx)) | |
| return JSONResponse( | |
| {"error": "UI not built — run `cd ui && npm run build` before deploying."}, | |
| status_code=503, | |
| ) | |
| # Gradio Server mode: HF Spaces (Gradio SDK) runs this file and serves `app` on 7860. | |
| _start_sweeper() | |
| # Background binary enricher: drains unknown tool-names discovered during analysis and | |
| # resolves them (local bundled DB → Nemotron → public registries), writing the shared | |
| # learned registry on the bucket so later users get better detection. server/app.py owns | |
| # the daemon + queue; it shares to R2 only on explicit consent (off by default here). | |
| try: | |
| srv._start_enricher() | |
| except Exception: | |
| pass | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))), | |
| show_error=False, # don't surface server tracebacks to clients (info-disclosure) | |
| ) | |