DataAgent-evals / app.py
AdithyaSK's picture
AdithyaSK HF Staff
viz: bigger fonts, desktop-only gate, benchmark toggle + info popover, dense details panel with M×H/M×D/H×D matrices
865e161
Raw
History Blame Contribute Delete
6.18 kB
"""FastAPI viz server — serves eval visualizations for one or more sweeps.
**Hybrid storage layout** (small files in repo, big trace blobs in a bucket):
site/ ← stays small (≤ a few MB total)
├── viewer.html # shared UI, dataset-toggle aware
└── <ds>/summary.json # one per dataset; heatmap + per-attempt meta
hf://buckets/<TRACES_BUCKET>/<ds>/traces.json
# the big blob; loaded lazily on first
# /api/<ds>/trace/{tid} request and kept
# in memory after that.
Configurable via env vars (defaults shown):
TRACES_BUCKET = AdithyaSK/data-agent-eval-traces
TRACES_LOCAL = (unset) — if set to a local dir, used instead of the bucket. Useful
for offline dev. Layout: $TRACES_LOCAL/<ds>/traces.json
Endpoints (`<ds>` is a site/<dir> name, e.g. `v1` or `dabstep`):
GET / → viewer.html
GET /api/datasets → ["v1", "dabstep"] (whatever exists under site/)
GET /api/<ds>/summary → summary.json for that dataset
GET /api/<ds>/trace/{tid} → one trajectory from that dataset's traces.json
GET /healthz → {ok, datasets, traces_source, loaded_traces}
Run locally:
cd .. && uv run uvicorn viz_server.app:app --port 8000
# open http://localhost:8000
# (reads traces from the bucket via HfFileSystem; set HF_TOKEN if private)
On a Hugging Face Docker Space, this file sits at the repo root and runs via:
uvicorn app:app --host 0.0.0.0 --port 7860
"""
from __future__ import annotations
import json
import os
from pathlib import Path
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse, JSONResponse
SITE = Path(__file__).resolve().parent / "site"
TRACES_BUCKET = os.getenv("TRACES_BUCKET", "AdithyaSK/data-agent-eval-traces")
TRACES_LOCAL = os.getenv("TRACES_LOCAL") # optional override for offline dev
app = FastAPI(title="Harbor Eval Visualizations", docs_url="/docs")
# Per-dataset trace cache. Populated lazily on first /api/<ds>/trace/{tid}.
_TRACES: dict[str, dict] = {}
def _list_datasets() -> list[str]:
"""Every site/<dir>/ with a summary.json is a dataset (alpha-sorted)."""
if not SITE.exists():
return []
return sorted(
d.name for d in SITE.iterdir()
if d.is_dir() and (d / "summary.json").exists()
)
def _traces_source() -> str:
"""For /healthz — where do trace blobs come from this process?"""
if TRACES_LOCAL:
return f"local:{TRACES_LOCAL}"
return f"hf://buckets/{TRACES_BUCKET}"
def _open_traces_file(ds: str):
"""Return a text-mode file handle pointing at this dataset's traces.json.
Resolution order:
1. $TRACES_LOCAL/<ds>/traces.json (if TRACES_LOCAL env is set)
2. hf://buckets/$TRACES_BUCKET/<ds>/traces.json (the canonical path)
Raises HTTPException(404) if neither exists.
"""
if TRACES_LOCAL:
p = Path(TRACES_LOCAL) / ds / "traces.json"
if not p.exists():
raise HTTPException(404, f"local traces missing for {ds!r}: {p}")
return p.open("r")
# Lazy import — only needed when reading from the bucket, keeps offline
# dev (TRACES_LOCAL set) free of the huggingface_hub dependency.
from huggingface_hub import HfFileSystem
fs = HfFileSystem()
remote = f"buckets/{TRACES_BUCKET}/{ds}/traces.json"
if not fs.exists(remote):
raise HTTPException(404, f"bucket traces missing for {ds!r}: hf://{remote}")
return fs.open(remote, "r")
def _load_traces(ds: str) -> dict:
"""Lazy-load (and cache) the traces for a dataset. Reads from local override
if `TRACES_LOCAL` is set; otherwise from the configured bucket."""
if ds in _TRACES:
return _TRACES[ds]
with _open_traces_file(ds) as f:
_TRACES[ds] = json.load(f)
return _TRACES[ds]
@app.get("/")
def index() -> FileResponse:
f = SITE / "viewer.html"
if not f.exists():
raise HTTPException(503, "viewer.html missing — run build_data.py first")
return FileResponse(f, media_type="text/html")
@app.get("/api/datasets")
def datasets() -> JSONResponse:
"""Legacy: flat list of sweep keys. Kept for backward-compat with older viewer.html."""
return JSONResponse(_list_datasets())
@app.get("/api/benchmarks")
def benchmarks() -> JSONResponse:
"""Benchmark registry — drives the top-level toggle + info icon in the viewer.
Returned shape (from site/benchmarks.json):
{
"<benchmark-key>": {
"label": "...", "description": "...",
"source": {"harbor": "...", "hf_dataset": "...", ...},
"sweeps": ["v1", ...]
},
...
}
Falls back to a synthetic registry if benchmarks.json is missing (one entry
per discovered sweep) so old deploys keep working.
"""
reg_path = SITE / "benchmarks.json"
if reg_path.exists():
return JSONResponse(json.loads(reg_path.read_text()))
# Fallback: synthesize from sweeps.
fallback = {
ds: {"label": ds, "description": "", "source": {}, "sweeps": [ds]}
for ds in _list_datasets()
}
return JSONResponse(fallback)
@app.get("/api/{ds}/summary")
def summary(ds: str) -> FileResponse:
f = SITE / ds / "summary.json"
if not f.exists():
raise HTTPException(404, f"dataset {ds!r} not found (no summary.json)")
return FileResponse(f, media_type="application/json")
@app.get("/api/{ds}/trace/{tid}")
def trace(ds: str, tid: int) -> JSONResponse:
t = _load_traces(ds).get(str(tid))
if t is None:
raise HTTPException(404, f"trace {tid} not found in dataset {ds!r}")
return JSONResponse(t)
@app.get("/healthz")
def healthz() -> dict:
return {
"ok": (SITE / "viewer.html").exists(),
"datasets": _list_datasets(),
"traces_source": _traces_source(),
"loaded_traces": {ds: len(t) for ds, t in _TRACES.items()},
}