Spaces:

build-small-hackathon
/

her

Running on Zero

File size: 20,400 Bytes

#!/usr/bin/env python3
"""Her · हेर — Hugging Face ZeroGPU Space entrypoint (Gradio Server mode).

ZeroGPU is Gradio-SDK-only and its GPU quota requires the HF iframe auth headers to
be forwarded on GPU-invoking calls — a plain `fetch` to a custom route that triggers
`@spaces.GPU` bypasses that and fails. So this app uses **Gradio Server mode**
(`gradio.Server`, a FastAPI server with Gradio's API engine):

  * DETERMINISTIC engine endpoints (no GPU) are plain FastAPI routes the React app
    calls with `fetch`:
        GET  /api/health            GET  /api/sessions
        POST /api/upload            GET  /api/analyze?path=
        GET  /api/project?cwd=      POST /api/clear      GET/POST /api/consent
  * GPU narration endpoints are Gradio API endpoints (`@app.api`) the browser calls
    via `@gradio/client` (which forwards the auth headers ZeroGPU needs):
        overview · advice · chat · project_chat · project_narrative

STORAGE & PRIVACY (the hosted Space):
  * Uploaded sessions are stored on an HF **storage bucket** mounted read-write at
    `HER_DATA_DIR` (`/data`), namespaced per client: `/data/<ns>/<project>/<file>.jsonl`
    where `ns = sha256(client-token)`. The client token is generated in the browser
    (localStorage) and sent as the `X-Her-Client` header (REST) / `client` arg (Gradio),
    so every user only ever SEES and ANALYZES their own sessions — public-safe.
  * Trace content is auto-deleted: a background sweeper removes anything older than
    `HER_RETENTION_HOURS` (24h) — the hard guarantee — and `POST /api/clear` wipes the
    caller's namespace immediately (the UI calls it on a "Clear" click and on tab-close).

The deterministic ENGINE is reused unchanged from the local product; only the transport
and the model backend differ. server/app.py stays the single source of truth.
"""
from __future__ import annotations

import hashlib
import os
import re
import shutil
import sys
import threading
import time
import uuid
from pathlib import Path

# Select the HF/ZeroGPU narrator backend BEFORE importing server helpers, so every
# get_narrator() call in server/app.py resolves to the transformers model.
os.environ.setdefault("HER_BACKEND", "hf")
# No usage telemetry to gradio.app from a privacy-focused app (set before importing gradio).
os.environ.setdefault("GRADIO_ANALYTICS_ENABLED", "False")

import spaces  # noqa: F401  (ZeroGPU runtime hook; effect-free off-Space)

# Force the model to load at MODULE level (ZeroGPU requirement: cuda placement under
# CUDA-emulation at import; real GPU only inside @spaces.GPU). Safe if it fails — the
# narrator reports not-ready and callers fall back to the deterministic prose.
import narrator.hf_narrator  # noqa: F401,E402

import gradio as gr  # noqa: E402
from fastapi import File, Form, Header, UploadFile  # noqa: E402
from fastapi.responses import FileResponse, JSONResponse  # noqa: E402
from fastapi.staticfiles import StaticFiles  # noqa: E402

import server.app as srv  # noqa: E402  (the engine request logic — reused as-is)

REPO = Path(__file__).resolve().parent
DIST = REPO / "ui" / "dist"

# Storage root: the HF bucket mount on the Space (HER_DATA_DIR=/data), else a local dir.
# server/app.py is told HER_EXTRA_ROOT=/data so _safe_session_path permits paths here.
DATA_DIR = Path(os.environ.get("HER_DATA_DIR", str(REPO / ".uploads"))).resolve()
DATA_DIR.mkdir(parents=True, exist_ok=True)
RETENTION_HOURS = float(os.environ.get("HER_RETENTION_HOURS", "24"))
SWEEP_INTERVAL = int(os.environ.get("HER_SWEEP_INTERVAL", "1800"))  # 30 min

# Public-safe budgets — one client must not be able to exhaust memory or the bucket.
MAX_UPLOAD_BYTES = 70 * 1024 * 1024        # 70 MB per uploaded session file
MAX_PROJECTS_PER_NS = 50                    # projects (subdirs) per client namespace
MAX_SESSIONS_PER_PROJECT = 50              # .jsonl sessions per project subdir


def _log_err(where: str, e: Exception) -> None:
    """Server-side error detail (stderr) so client responses can stay generic — we
    never hand internal paths / tracebacks back to the browser (info-disclosure)."""
    print(f"[her] {where}: {type(e).__name__}: {e}", file=sys.stderr, flush=True)

# The shared, persistent binary registry the enricher writes lives OUTSIDE every user
# namespace (`/data/_registry/...` via HER_LEARNED_PATH). Users can never reach it:
# uploads only ever land under `/data/<sha256(token)>/`, and the sweeper skips it.
REGISTRY_DIRNAME = "_registry"
# The recorded product demo (mp4) is a shared, non-user asset on the bucket at
# `/data/_assets/her-demo.mp4` (uploaded out-of-band, served read-only by /api/demo-video).
# Like the registry it is never a user upload and must never be swept.
ASSETS_DIRNAME = "_assets"
DEMO_VIDEO_NAME = "her-demo.mp4"
# Bucket dirs that hold shared state, not per-user trace content — the sweeper skips them.
PROTECTED_DIRNAMES = (REGISTRY_DIRNAME, ASSETS_DIRNAME)
_LEARNED = os.environ.get("HER_LEARNED_PATH")
if _LEARNED:
    try:
        Path(_LEARNED).parent.mkdir(parents=True, exist_ok=True)
    except OSError:
        pass

app = gr.Server()


# --------------------------------------------------------------------------- #
# per-client namespace — isolates each browser's uploads (public-safe). The token
# is opaque to us; we only hash it to a directory name.
# --------------------------------------------------------------------------- #
def _ns(client: str) -> str:
    return hashlib.sha256((client or "anon").encode("utf-8")).hexdigest()[:16]


def _ns_dir(client: str) -> Path:
    return DATA_DIR / _ns(client)


def _safe_subdir(name: str) -> str:
    """Sanitize a caller-supplied project subdir (no traversal); default 'uploads'.
    '.' is dropped entirely so '..'/dot-segments can never escape the namespace dir."""
    s = re.sub(r"[^A-Za-z0-9_-]", "_", (name or "").strip())
    return s[:80] or "uploads"


def _client_owns(p: Path, client: str) -> bool:
    """A bucket-stored path must belong to the requesting client's namespace. Paths
    outside DATA_DIR (the bundled fixture / local sessions) are unaffected."""
    try:
        if not p.is_relative_to(DATA_DIR):
            return True
        return p.is_relative_to(_ns_dir(client))
    except Exception:
        return False  # fail CLOSED — a security predicate must never default to "allow"


# --------------------------------------------------------------------------- #
# DETERMINISTIC engine endpoints — plain FastAPI routes, no GPU (React `fetch`).
# --------------------------------------------------------------------------- #
@app.get("/api/health")
def api_health():
    try:
        ready = srv.get_narrator().wait_until_ready(max_wait=0.1, interval=0.1)
    except Exception:
        ready = False
    # `llama` is the UI's flag for "model reachable"; `gpu` tells the UI to route
    # narration through @gradio/client (auth forwards for ZeroGPU quota).
    # `space` (HF sets SPACE_ID="owner/name" in the container) lets the UI build a
    # download command that points at THIS Space, not the author's. Empty locally.
    return {"ok": True, "llama": bool(ready), "gpu": True, "space": os.environ.get("SPACE_ID", "")}


@app.get("/api/sessions")
def api_sessions(x_her_client: str = Header(default="")):
    try:
        # Scoped to THIS client's namespace — you only ever see your own uploads.
        return srv._sessions_payload(projects_dir=str(_ns_dir(x_her_client)))
    except Exception as e:  # never 500 the browser
        _log_err("sessions", e)
        return {"error": "could not list sessions", "projects": [], "total": 0}


@app.post("/api/upload")
async def api_upload(
    file: UploadFile = File(...),
    project: str = Form(default="uploads"),
    x_her_client: str = Header(default=""),
):
    """Store an uploaded .jsonl under the caller's namespace:
    /data/<ns>/<project>/<uuid>.jsonl. `project` (the bulk script passes the encoded
    project dir) becomes the subdir so discovery's <ns>/*/*.jsonl glob groups them.
    Guarded: .jsonl only, a hard size cap, and per-namespace project/session budgets."""
    name = (file.filename or "").lower()
    if not name.endswith(".jsonl"):
        return JSONResponse({"error": "only .jsonl files are accepted"}, status_code=400)
    # Bounded read: pull at most the cap (+1 sentinel) into memory — a multi-GB upload
    # can't OOM the box. read(N) returns ≤N bytes; cap+1 back means it's over budget.
    data = await file.read(MAX_UPLOAD_BYTES + 1)
    if len(data) > MAX_UPLOAD_BYTES:
        return JSONResponse({"error": "file too large (max 70 MB per session)"}, status_code=413)
    if not data.strip():
        return JSONResponse({"error": "empty file"}, status_code=400)
    nsd = _ns_dir(x_her_client)
    dest_dir = nsd / _safe_subdir(project)
    # belt + braces: the destination must stay inside the caller's namespace dir.
    try:
        if not dest_dir.resolve().is_relative_to(nsd.resolve()):
            return JSONResponse({"error": "bad project"}, status_code=400)
    except Exception:
        return JSONResponse({"error": "bad project"}, status_code=400)
    # per-namespace budgets — keep one client from filling the bucket (public-safe).
    if not dest_dir.exists() and nsd.is_dir():
        if sum(1 for d in nsd.iterdir() if d.is_dir()) >= MAX_PROJECTS_PER_NS:
            return JSONResponse({"error": f"project limit reached (max {MAX_PROJECTS_PER_NS} per user)"}, status_code=409)
    if dest_dir.is_dir() and sum(1 for _ in dest_dir.glob("*.jsonl")) >= MAX_SESSIONS_PER_PROJECT:
        return JSONResponse({"error": f"session limit reached for this project (max {MAX_SESSIONS_PER_PROJECT})"}, status_code=409)
    dest_dir.mkdir(parents=True, exist_ok=True)
    dest = dest_dir / f"{uuid.uuid4().hex}.jsonl"
    dest.write_bytes(data)
    return {"path": str(dest.resolve()), "name": file.filename}


@app.get("/api/analyze")
def api_analyze(path: str = "", x_her_client: str = Header(default="")):
    p = srv._safe_session_path(path or None)
    if p is None or not _client_owns(p, x_her_client):
        return JSONResponse({"error": "path not allowed"}, status_code=400)
    try:
        return srv._analyze_cached(p)
    except Exception as e:
        _log_err("analyze", e)
        return JSONResponse({"error": "analyze failed"}, status_code=500)


@app.get("/api/project")
def api_project(cwd: str = "", x_her_client: str = Header(default="")):
    if not cwd:
        return JSONResponse({"error": "cwd required"}, status_code=400)
    try:
        # Deterministic only; the prose narrative comes from the GPU `project_narrative`
        # Gradio endpoint (auth-forwarded), not this plain-REST route.
        return srv._project(cwd, with_narrative=False, projects_dir=str(_ns_dir(x_her_client)))
    except Exception as e:
        _log_err("project", e)
        return JSONResponse({"error": "could not load project"}, status_code=500)


@app.post("/api/clear")
async def api_clear(client: str = "", x_her_client: str = Header(default="")):
    """Wipe the caller's namespace (their uploaded sessions). `client` is also read
    from the query string so navigator.sendBeacon (which can't set headers) works on
    tab-close. Per-client: never touches anyone else's data."""
    cid = client or x_her_client
    nsd = _ns_dir(cid)
    removed = 0
    try:
        if cid and nsd.is_dir():
            removed = sum(1 for _ in nsd.rglob("*.jsonl"))
            shutil.rmtree(nsd, ignore_errors=True)
            srv._CACHE.clear()  # drop any cached analysis for the wiped files
    except Exception:
        pass
    return {"ok": True, "cleared": removed}


@app.get("/api/consent")
def api_consent_get():
    return srv._CONSENT


@app.post("/api/consent")
async def api_consent_post(request_body: dict | None = None):
    body = request_body or {}
    # default to False when missing so a malformed/empty body cannot opt anyone in.
    srv._save_consent(bool(body.get("accepted", False)), bool(body.get("share", False)))
    return srv._CONSENT


@app.get("/api/demo-video")
def api_demo_video():
    """Stream the recorded product demo. On the Space it lives on the bucket at
    `/data/_assets/her-demo.mp4` (uploaded out-of-band — never a user upload, never swept);
    locally we fall back to the repo's `demo/` copy so the button works in dev. FileResponse
    honours Range requests, so the player can seek. 404 (the UI handles it) when absent."""
    for p in (DATA_DIR / ASSETS_DIRNAME / DEMO_VIDEO_NAME, REPO / "demo" / "Her Demo.mp4"):
        if p.is_file():
            return FileResponse(str(p), media_type="video/mp4")
    return JSONResponse({"error": "demo video not available"}, status_code=404)


# --------------------------------------------------------------------------- #
# GPU narration endpoints — Gradio API (@app.api), called via @gradio/client so the
# HF iframe auth headers forward for ZeroGPU quota. `client` scopes to the caller's
# namespace. The only @spaces.GPU code is inside narrator.hf_narrator._generate.
# --------------------------------------------------------------------------- #
@app.api(name="overview")
def overview(path: str = "", client: str = "") -> dict:
    p = srv._safe_session_path(path or None)
    if p is None or not _client_owns(p, client):
        return {"overview": "", "model": None, "error": "path not allowed"}
    try:
        return srv._overview(srv._analyze_cached(p))
    except Exception as e:
        _log_err("overview", e)
        return {"overview": "", "model": None, "error": "overview failed"}


@app.api(name="advice")
def advice(path: str = "", client: str = "") -> dict:
    p = srv._safe_session_path(path or None)
    if p is None or not _client_owns(p, client):
        return {"recommendations": [], "model": None, "error": "path not allowed"}
    try:
        return srv._advice(srv._analyze_cached(p))
    except Exception as e:
        _log_err("advice", e)
        return {"recommendations": [], "model": None, "error": "advice failed"}


@app.api(name="chat")
def chat(question: str = "", path: str = "", client: str = "") -> dict:
    question = (question or "").strip()
    if not question:
        return {"answer": "", "citedTurns": [], "error": "empty question"}
    p = srv._safe_session_path(path or None)
    if p is None or not _client_owns(p, client):
        return {"answer": "", "citedTurns": [], "error": "path not allowed"}
    try:
        return srv._chat(question, p)
    except Exception as e:
        _log_err("chat", e)
        return {"answer": "", "citedTurns": [], "error": "chat failed"}


@app.api(name="project_chat")
def project_chat(question: str = "", cwd: str = "", client: str = "") -> dict:
    question = (question or "").strip()
    if not question:
        return {"answer": "", "sessionHits": [], "error": "empty question"}
    if not cwd:
        return {"answer": "", "sessionHits": [], "error": "cwd required"}
    try:
        return srv._project_chat(question, cwd, projects_dir=str(_ns_dir(client)))
    except Exception as e:
        _log_err("project_chat", e)
        return {"answer": "", "sessionHits": [], "error": "project chat failed"}


@app.api(name="project_narrative")
def project_narrative(cwd: str = "", client: str = "") -> dict:
    if not cwd:
        return {"narrative": "", "model": None}
    try:
        refs = srv._project_sessions(cwd, str(_ns_dir(client)))
        briefs = []
        for s in refs[: srv._PROJECT_CAP]:
            try:
                briefs.append(srv._brief(Path(s.path)))
            except Exception:
                continue
        return srv._project_narrative(cwd, briefs)
    except Exception as e:
        _log_err("project_narrative", e)
        return {"narrative": "", "model": None, "error": "narrative failed"}


# --------------------------------------------------------------------------- #
# TTL sweeper — the hard privacy guarantee. Deletes any uploaded session older than
# HER_RETENTION_HOURS and prunes empty namespace dirs. Runs at startup + on a timer.
# --------------------------------------------------------------------------- #
def _sweep_once() -> int:
    cutoff = time.time() - RETENTION_HOURS * 3600
    removed = 0
    if not DATA_DIR.exists():
        return 0
    for root, _dirs, files in os.walk(DATA_DIR):
        if any(d in Path(root).parts for d in PROTECTED_DIRNAMES):
            continue  # NEVER sweep shared state — the binary registry or the demo asset
        for fn in files:
            if not fn.endswith(".jsonl"):
                continue  # only ever delete uploaded sessions, never registry/state json
            fp = os.path.join(root, fn)
            try:
                if os.path.getmtime(fp) < cutoff:
                    os.remove(fp)
                    removed += 1
            except OSError:
                pass
    # prune now-empty dirs bottom-up (keep DATA_DIR itself and the registry)
    for root, _dirs, _files in os.walk(DATA_DIR, topdown=False):
        if os.path.abspath(root) == str(DATA_DIR) or any(d in Path(root).parts for d in PROTECTED_DIRNAMES):
            continue
        try:
            if not os.listdir(root):
                os.rmdir(root)
        except OSError:
            pass
    if removed:
        try:
            srv._CACHE.clear()
        except Exception:
            pass
    return removed


def _sweeper_loop():
    while True:
        try:
            _sweep_once()
        except Exception:
            pass
        time.sleep(SWEEP_INTERVAL)


def _start_sweeper():
    try:
        _sweep_once()  # clear anything stale at boot
    except Exception:
        pass
    threading.Thread(target=_sweeper_loop, daemon=True, name="her-ttl-sweeper").start()


# --------------------------------------------------------------------------- #
# Static: serve the built React SPA (ui/dist). The app has NO client-side router
# (navigation is state-based), so we serve index.html at "/", the hashed bundles
# under /assets, the pulled logos under /binary-logos, and the few root images by
# EXACT path. We deliberately avoid any wildcard/catch-all: Gradio registers its own
# /gradio_api/* and /config routes at launch() — AFTER these — so a greedy route here
# would shadow them and break @gradio/client + ZeroGPU (and Gradio's startup check).
# --------------------------------------------------------------------------- #
if (DIST / "assets").is_dir():
    app.mount("/assets", StaticFiles(directory=str(DIST / "assets")), name="assets")
if (DIST / "binary-logos").is_dir():
    app.mount("/binary-logos", StaticFiles(directory=str(DIST / "binary-logos")), name="binary-logos")
if (DIST / "brand").is_dir():
    app.mount("/brand", StaticFiles(directory=str(DIST / "brand")), name="brand")  # "built on" logos
if (DIST / "fonts").is_dir():
    app.mount("/fonts", StaticFiles(directory=str(DIST / "fonts")), name="fonts")  # self-hosted webfonts

_ROOT_STATIC = [
    "favicon.png", "her-logo-light.png", "her-logo.png", "her-mark-light.png", "her-mark.png",
    "fonts.css",
]


def _root_route(fname: str):
    async def _route():
        p = DIST / fname
        if p.is_file():
            return FileResponse(str(p))
        return JSONResponse({"error": "not found"}, status_code=404)
    return _route


for _fn in _ROOT_STATIC:
    app.add_api_route(f"/{_fn}", _root_route(_fn), methods=["GET"])


@app.get("/")
def index():
    idx = DIST / "index.html"
    if idx.is_file():
        return FileResponse(str(idx))
    return JSONResponse(
        {"error": "UI not built — run `cd ui && npm run build` before deploying."},
        status_code=503,
    )


# Gradio Server mode: HF Spaces (Gradio SDK) runs this file and serves `app` on 7860.
_start_sweeper()
# Background binary enricher: drains unknown tool-names discovered during analysis and
# resolves them (local bundled DB → Nemotron → public registries), writing the shared
# learned registry on the bucket so later users get better detection. server/app.py owns
# the daemon + queue; it shares to R2 only on explicit consent (off by default here).
try:
    srv._start_enricher()
except Exception:
    pass
app.launch(
    server_name="0.0.0.0",
    server_port=int(os.environ.get("PORT", os.environ.get("GRADIO_SERVER_PORT", 7860))),
    show_error=False,  # don't surface server tracebacks to clients (info-disclosure)
)