Spaces:
Sleeping
Sleeping
File size: 17,672 Bytes
12e13e9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 | """SimReady Validator β Gradio UI for the HuggingFace Space.
Two surfaces, same engine:
- **/run** (the on-screen button) β streams log lines to the UI for
interactive use by an operator in the browser.
- **/run_api** (hidden, programmatic) β returns the full RunResult as
a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
hits from the GitHub Actions runner so the workflow can patch
status.json and asset-status.json without scraping the UI's text.
Both go through `runner.run()`. The split is purely about output
shape (streaming text vs. one-shot dict).
The Space is internal-pilot scope: HF_TOKEN comes from the Space's
secrets, NOT from the requester. When a customer's dataset PR triggers
this (next milestone), the webhook payload identifies the dataset and
the Space's own token opens the verdict PR.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import gradio as gr
from runner import (run as run_validator, progress_path_for, cancel_path_for,
run_token_path_for, CANCEL_DIR)
PROFILE_CHOICES = [
"Prop-Robotics-Neutral",
"Prop-Robotics-Physx",
"Prop-Robotics-Isaac",
"Robot-Body-Neutral",
"Robot-Body-Runnable",
"Robot-Body-Isaac",
"Package",
"Package-Candidate",
]
DEFAULT_PROFILE = "Prop-Robotics-Neutral"
DEFAULT_VERSION = "1.0.0"
def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
submission_id: str = "", force: bool = False,
preliminary: bool = False, use_kit: bool = False) -> dict:
"""Programmatic endpoint. Returns the RunResult as a JSON dict.
Caller is typically `tools/hf_watch/call_hf_space.py` running from
a GitHub Actions ubuntu-latest runner. Output shape must stay
stable β bump `schema_version` if you change it. The receiver
pattern-matches on the same field names `tools/hf_watch/validate.py`
produces, so status.json patching is identical regardless of which
backend ran the validation.
`submission_id` is optional β when set, the validator writes
per-asset progress to /tmp/sr-progress/<id>.json, which the
get_progress endpoint serves to the dashboard.
`preliminary` switches the runner to a structure-only sweep:
zip-bundled datasets are scanned (instead of failing
PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
sliced to the first asset only. Used by the dashboard's
Preliminary scan tab.
"""
print(f"[run_api] preliminary={preliminary!r} force={force!r} "
f"use_kit={use_kit!r} submission_id={submission_id!r}", flush=True)
# Untrusted callers can hit /run_api directly β profile/version flow
# into the validator's argv, so validate them before use. Empty
# falls back to the defaults (existing behavior).
import re
profile = profile or DEFAULT_PROFILE
if profile not in PROFILE_CHOICES and profile.lower() != "auto":
raise ValueError(f"invalid profile: {profile!r}")
version = (version or DEFAULT_VERSION).strip()
if not re.fullmatch(r"[\w.\-]+", version):
raise ValueError(f"invalid version: {version!r}")
result = run_validator(
dataset=(dataset or "").strip(),
profile=profile,
version=version,
open_pr=bool(open_pr),
submission_id=(submission_id or "").strip(),
force=bool(force),
preliminary=bool(preliminary),
use_kit=bool(use_kit),
)
return {
"schema_version": 1,
"dataset": result.dataset,
"profile": result.profile,
"version": result.version,
"status": result.status,
"summary": result.summary,
"results_json": _sanitize_results_json(result.results_json),
"pr_url": result.pr_url,
}
def _list_profiles() -> dict:
"""Return the set of profiles that actually load on this Space's
foundation+validator combination. The dashboard polls this to
populate its dropdown so operators can't pick a profile that
would fatally fail at registration time.
Uses --use-plugin since the default CLI loader has known
registration mismatches against the current foundation pin; the
plugin path is what runner.py's streaming-zip flow falls back
to and is the source of truth for "actually usable" here.
Output format from validate.py is `PROFILE: <id> v<version>`
per profile, one per line.
"""
import subprocess, sys
from runner import VALIDATOR
try:
proc = subprocess.run(
# --list-profiles only ENUMERATES registered profiles from the
# spec/plugin registry (--use-plugin) β it runs no validation
# rules, so it never needs Kit. Force --no-use-kit: on a
# Kit-enabled image the validator auto-enables --use-kit for the
# PhysX-bearing default profile and boots the full Isaac Sim
# runtime (~5 min) just to print the list, blowing the 300s
# timeout below. Actual validation (runner.py) still uses Kit.
[sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
capture_output=True, text=True, timeout=300,
)
names: list[str] = []
for line in (proc.stdout or "").splitlines():
s = line.strip()
# Validator emits "PROFILE: <id> v<version>" β that's our
# only authoritative shape. Anything else is noise.
if s.startswith("PROFILE:"):
rest = s[len("PROFILE:"):].strip()
pid = rest.split()[0] if rest else ""
if pid:
names.append(pid)
# Dedupe while preserving order.
seen = set()
unique = []
for n in names:
if n not in seen:
seen.add(n)
unique.append(n)
result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
if not unique:
# No profiles registered AND no parse hits β surface why so
# the dashboard can show something useful. Truncate so the
# JSON response stays small.
stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
result["stderr_tail"] = stderr_tail
result["stdout_tail"] = stdout_tail
return result
except subprocess.TimeoutExpired:
return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
except Exception as e:
return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
def _cancel_run(submission_id: str, run_token: str = "") -> dict:
"""Write the cancel-signal file for a given submission. The
streaming-zip loop in runner.py checks for this file between zips
and aborts when present. Idempotent β calling multiple times has no
extra effect; consuming runner.py deletes it.
`run_token` is the per-run token the dashboard read from get_progress.
It becomes the flag's content so runner._is_cancelled only honors it
for the exact run it was issued against β a flag left over from a
prior run of this submission can never abort a fresh one."""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
path = cancel_path_for(sid)
if path is None:
return {"state": "no_id"}
try:
CANCEL_DIR.mkdir(parents=True, exist_ok=True)
path.write_text((run_token or "").strip(), encoding="utf-8")
return {"state": "signaled", "path": str(path)}
except OSError as e:
return {"state": "error", "error": f"{type(e).__name__}: {e}"}
def _get_progress(submission_id: str) -> dict:
"""Read the validator's per-asset progress file for this submission.
Polled by the dashboard ~every 3 s while a Validate-now click is
in-flight, so the "Validate now" button can fill up as the
validator works through the asset list.
Returns one of three shapes:
- {"state": "not_found"} β no progress file (Space restarted, or
the dashboard is polling a Space-run that never happened).
- {"state": "starting"} β file seeded by runner.py before the
validator started its loop. processed/total are 0.
- {processed, total, current, started_at, updated_at} β live
per-asset progress written by validate.py._emit_progress.
Every shape also carries `run_token` (the current run's cancel
token, from the sidecar file) when one exists, so the dashboard can
echo it back to cancel_run and target the exact run.
Caller treats anything with total > 0 as "show the fill bar".
"""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
# Per-run cancel token (sidecar; see runner.run_token_path_for).
# Surfaced on every shape so the dashboard can echo it back to
# cancel_run β a cancel then only aborts the run it was issued
# against, never a later one that reused the submission_id.
run_token = ""
tok_path = run_token_path_for(sid)
if tok_path and tok_path.is_file():
try:
run_token = tok_path.read_text(encoding="utf-8").strip()
except OSError:
pass
path = progress_path_for(sid)
if path is None or not path.is_file():
return {"state": "not_found", "run_token": run_token}
try:
data = json.loads(path.read_text(encoding="utf-8"))
if isinstance(data, dict) and run_token:
data["run_token"] = run_token
return data
except (OSError, json.JSONDecodeError):
# Mid-write β caller will poll again in a few seconds.
return {"state": "transient", "run_token": run_token}
def _sanitize_results_json(raw: dict) -> dict:
"""Strip absolute filesystem paths from results_json before returning.
Gradio's JSON serializer treats string fields that resolve to files
on the Space's filesystem as downloadable references and tries to
serve them through `/gradio_api/file=...`. The validator's
results.json contains absolute paths (target dir + per-asset
`path`) which point into the Space's ephemeral tempdir and are
NOT exposed through gradio's allowed_paths β gradio_client then
fails with 403 trying to auto-fetch them after a successful run.
Callers don't need filesystem paths anyway β only `rel_path`
(dataset-relative), `passed`, and `issues` are used downstream.
Keep the rest of the report intact (profile_coverage, summary,
layout_findings, etc.).
"""
if not isinstance(raw, dict):
return raw
sanitized = {k: v for k, v in raw.items() if k != "target"}
if "results" in sanitized and isinstance(sanitized["results"], list):
sanitized["results"] = [
{k: v for k, v in asset.items() if k != "path"}
for asset in sanitized["results"]
if isinstance(asset, dict)
]
# Specs/dashboard dir paths are local to the Space, useless to caller.
for k in ("specs_docs_dir", "dashboard_docs_dir"):
sanitized.pop(k, None)
return sanitized
def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
"""Generator that yields incremental log output to the UI as the
validator runs. Gradio streams each yielded tuple to the connected
outputs."""
lines: list[str] = []
def log(line: str) -> None:
lines.append(line)
yield "\n".join(lines), "", "(runningβ¦)", None
try:
result = run_validator(
dataset=dataset.strip(),
profile=profile,
version=version.strip() or DEFAULT_VERSION,
open_pr=open_pr,
log=log,
)
except Exception as e:
lines.append(f"\nERROR: {type(e).__name__}: {e}")
yield "\n".join(lines), "", f"error: {e}", None
return
status_badge = f"**{result.status.upper()}** β {result.summary}"
if result.pr_url:
status_badge += f"\n\nPR: {result.pr_url}"
report_index = result.report_path / "index.html"
report_url = str(report_index) if report_index.is_file() else None
yield (
"\n".join(lines),
status_badge,
result.summary,
report_url,
)
def _read_md(name: str) -> str:
"""Return the contents of name (relative to this file's dir),
stripping a leading YAML frontmatter block if present. Falls back
to a friendly stub when the file is missing β keeps the Space
bootable even before the space-deploy workflow has synced the
assembled docs into the container."""
from pathlib import Path
p = Path(__file__).resolve().parent / name
try:
src = p.read_text(encoding="utf-8")
except FileNotFoundError:
return f"_{name} not yet synced into this Space β check back after the next deploy._"
if src.startswith("---"):
end = src.find("\n---\n", 4)
if end > 0:
src = src[end + len("\n---\n"):].lstrip()
return src
with gr.Blocks(title="SimReady Validator") as demo:
with gr.Tabs():
with gr.Tab("Overview"):
gr.Markdown(_read_md("README.md"))
with gr.Tab("Validator"):
gr.Markdown(
"Submit a HuggingFace dataset to validate against a SimReady "
"profile. With **Open PR** enabled, the verdict is uploaded "
"back to the dataset as a `validation/` pull request."
)
with gr.Row():
dataset = gr.Textbox(
label="Dataset",
placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
)
with gr.Row():
profile = gr.Dropdown(
choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
)
version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
run_btn = gr.Button("Validate", variant="primary")
status_md = gr.Markdown(label="Verdict")
summary_box = gr.Textbox(label="Summary", interactive=False)
log_box = gr.Textbox(label="Log", lines=20, interactive=False)
report_link = gr.File(label="HTML report (download)", interactive=False)
with gr.Tab("Partner walkthrough"):
gr.Markdown(_read_md("VALIDATE.md"))
run_btn.click(
fn=_run_streaming,
inputs=[dataset, profile, version, open_pr],
outputs=[log_box, status_md, summary_box, report_link],
api_name="run",
)
# Programmatic endpoint β bound to invisible components so the UI
# doesn't render anything extra, but the Gradio queue still exposes
# an `/api/predict/run_api` route the gradio_client can hit. The
# outputs[0] is the JSON return; api_name turns it into a stable
# path the GitHub Actions caller depends on.
api_dataset = gr.Textbox(visible=False)
api_profile = gr.Textbox(visible=False)
api_version = gr.Textbox(visible=False)
api_open_pr = gr.Checkbox(visible=False)
api_submission_id = gr.Textbox(visible=False)
api_force = gr.Checkbox(visible=False)
api_preliminary = gr.Checkbox(visible=False)
api_use_kit = gr.Checkbox(visible=False)
api_output = gr.JSON(visible=False)
api_button = gr.Button(visible=False)
api_button.click(
fn=_run_api,
inputs=[api_dataset, api_profile, api_version, api_open_pr,
api_submission_id, api_force, api_preliminary, api_use_kit],
outputs=api_output,
api_name="run_api",
)
# Progress endpoint β polled by the dashboard while a row is
# validating. CORS is open on /gradio_api/* by default, so the
# browser can fetch this from github.io directly without any
# GitHub-Actions side polling/commit churn.
prog_in = gr.Textbox(visible=False)
prog_out = gr.JSON(visible=False)
prog_button = gr.Button(visible=False)
prog_button.click(
fn=_get_progress,
inputs=[prog_in],
outputs=prog_out,
api_name="get_progress",
)
# Profile-listing endpoint β polled by the dashboard at startup
# so its dropdown reflects what's actually loadable on this Space
# right now (foundation+validator pin determines which profiles
# register). Stops the operator from picking something that
# would fatal at runtime.
profiles_out = gr.JSON(visible=False)
profiles_button = gr.Button(visible=False)
profiles_button.click(
fn=_list_profiles,
inputs=None,
outputs=profiles_out,
api_name="list_profiles",
)
# Cancel endpoint β the dashboard's Cancel button calls this AFTER
# cancelling the GH Action so the in-flight server-side gradio call
# actually stops (cancelling the Action alone only kills the
# gradio_client wrapper, the Space's loop keeps going).
cancel_in = gr.Textbox(visible=False)
cancel_token = gr.Textbox(visible=False)
cancel_out = gr.JSON(visible=False)
cancel_button = gr.Button(visible=False)
cancel_button.click(
fn=_cancel_run,
inputs=[cancel_in, cancel_token],
outputs=cancel_out,
api_name="cancel_run",
)
if __name__ == "__main__":
demo.queue().launch(
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
)
|