Spaces:
Sleeping
Sleeping
File size: 14,958 Bytes
6b5095a 555a14f 6b5095a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | """SimReady Validator β Gradio UI for the HuggingFace Space.
Two surfaces, same engine:
- **/run** (the on-screen button) β streams log lines to the UI for
interactive use by an operator in the browser.
- **/run_api** (hidden, programmatic) β returns the full RunResult as
a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
hits from the GitHub Actions runner so the workflow can patch
status.json and asset-status.json without scraping the UI's text.
Both go through `runner.run()`. The split is purely about output
shape (streaming text vs. one-shot dict).
The Space is internal-pilot scope: HF_TOKEN comes from the Space's
secrets, NOT from the requester. When a customer's dataset PR triggers
this (next milestone), the webhook payload identifies the dataset and
the Space's own token opens the verdict PR.
"""
from __future__ import annotations
import dataclasses
import json
import os
from pathlib import Path
import gradio as gr
from runner import run as run_validator, progress_path_for, cancel_path_for, CANCEL_DIR
PROFILE_CHOICES = [
"Prop-Robotics-Neutral",
"Prop-Robotics-Physx",
"Prop-Robotics-Isaac",
"Robot-Body-Neutral",
"Robot-Body-Runnable",
"Robot-Body-Isaac",
"Package",
"Package-Candidate",
]
DEFAULT_PROFILE = "Prop-Robotics-Neutral"
DEFAULT_VERSION = "1.0.0"
def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
submission_id: str = "", force: bool = False,
preliminary: bool = False) -> dict:
"""Programmatic endpoint. Returns the RunResult as a JSON dict.
Caller is typically `tools/hf_watch/call_hf_space.py` running from
a GitHub Actions ubuntu-latest runner. Output shape must stay
stable β bump `schema_version` if you change it. The receiver
pattern-matches on the same field names `tools/hf_watch/validate.py`
produces, so status.json patching is identical regardless of which
backend ran the validation.
`submission_id` is optional β when set, the validator writes
per-asset progress to /tmp/sr-progress/<id>.json, which the
get_progress endpoint serves to the dashboard.
`preliminary` switches the runner to a structure-only sweep:
zip-bundled datasets are scanned (instead of failing
PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
sliced to the first asset only. Used by the dashboard's
Preliminary scan tab.
"""
print(f"[run_api] preliminary={preliminary!r} force={force!r} "
f"submission_id={submission_id!r}", flush=True)
result = run_validator(
dataset=(dataset or "").strip(),
profile=profile or DEFAULT_PROFILE,
version=(version or DEFAULT_VERSION).strip(),
open_pr=bool(open_pr),
submission_id=(submission_id or "").strip(),
force=bool(force),
preliminary=bool(preliminary),
)
return {
"schema_version": 1,
"dataset": result.dataset,
"profile": result.profile,
"version": result.version,
"status": result.status,
"summary": result.summary,
"results_json": _sanitize_results_json(result.results_json),
"pr_url": result.pr_url,
}
def _list_profiles() -> dict:
"""Return the set of profiles that actually load on this Space's
foundation+validator combination. The dashboard polls this to
populate its dropdown so operators can't pick a profile that
would fatally fail at registration time.
Uses --use-plugin since the default CLI loader has known
registration mismatches against the current foundation pin; the
plugin path is what runner.py's streaming-zip flow falls back
to and is the source of truth for "actually usable" here.
Output format from validate.py is `PROFILE: <id> v<version>`
per profile, one per line.
"""
import subprocess, sys
from runner import VALIDATOR
try:
proc = subprocess.run(
# --no-use-kit: validate.py auto-enables --use-kit when the
# default profile is PhysX-bearing, and then errors out
# because the Space has no Kit Python. That bailout happens
# before --list-profiles iterates, masking everything as
# "0 profiles". Explicitly disabling Kit lets the loader run.
[sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
capture_output=True, text=True, timeout=300,
)
names: list[str] = []
for line in (proc.stdout or "").splitlines():
s = line.strip()
# Validator emits "PROFILE: <id> v<version>" β that's our
# only authoritative shape. Anything else is noise.
if s.startswith("PROFILE:"):
rest = s[len("PROFILE:"):].strip()
pid = rest.split()[0] if rest else ""
if pid:
names.append(pid)
# Dedupe while preserving order.
seen = set()
unique = []
for n in names:
if n not in seen:
seen.add(n)
unique.append(n)
result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
if not unique:
# No profiles registered AND no parse hits β surface why so
# the dashboard can show something useful. Truncate so the
# JSON response stays small.
stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
result["stderr_tail"] = stderr_tail
result["stdout_tail"] = stdout_tail
return result
except subprocess.TimeoutExpired:
return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
except Exception as e:
return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
def _cancel_run(submission_id: str) -> dict:
"""Touch the cancel-signal file for a given submission. The
streaming-zip loop in runner.py checks for this file between
zips and aborts when present. Idempotent β calling multiple
times has no extra effect; consuming runner.py deletes it."""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
path = cancel_path_for(sid)
if path is None:
return {"state": "no_id"}
try:
CANCEL_DIR.mkdir(parents=True, exist_ok=True)
path.touch(exist_ok=True)
return {"state": "signaled", "path": str(path)}
except OSError as e:
return {"state": "error", "error": f"{type(e).__name__}: {e}"}
def _get_progress(submission_id: str) -> dict:
"""Read the validator's per-asset progress file for this submission.
Polled by the dashboard ~every 3 s while a Validate-now click is
in-flight, so the "Validate now" button can fill up as the
validator works through the asset list.
Returns one of three shapes:
- {"state": "not_found"} β no progress file (Space restarted, or
the dashboard is polling a Space-run that never happened).
- {"state": "starting"} β file seeded by runner.py before the
validator started its loop. processed/total are 0.
- {processed, total, current, started_at, updated_at} β live
per-asset progress written by validate.py._emit_progress.
Caller treats anything with total > 0 as "show the fill bar".
"""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
path = progress_path_for(sid)
if path is None or not path.is_file():
return {"state": "not_found"}
try:
return json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
# Mid-write β caller will poll again in a few seconds.
return {"state": "transient"}
def _sanitize_results_json(raw: dict) -> dict:
"""Strip absolute filesystem paths from results_json before returning.
Gradio's JSON serializer treats string fields that resolve to files
on the Space's filesystem as downloadable references and tries to
serve them through `/gradio_api/file=...`. The validator's
results.json contains absolute paths (target dir + per-asset
`path`) which point into the Space's ephemeral tempdir and are
NOT exposed through gradio's allowed_paths β gradio_client then
fails with 403 trying to auto-fetch them after a successful run.
Callers don't need filesystem paths anyway β only `rel_path`
(dataset-relative), `passed`, and `issues` are used downstream.
Keep the rest of the report intact (profile_coverage, summary,
layout_findings, etc.).
"""
if not isinstance(raw, dict):
return raw
sanitized = {k: v for k, v in raw.items() if k != "target"}
if "results" in sanitized and isinstance(sanitized["results"], list):
sanitized["results"] = [
{k: v for k, v in asset.items() if k != "path"}
for asset in sanitized["results"]
if isinstance(asset, dict)
]
# Specs/dashboard dir paths are local to the Space, useless to caller.
for k in ("specs_docs_dir", "dashboard_docs_dir"):
sanitized.pop(k, None)
return sanitized
def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
"""Generator that yields incremental log output to the UI as the
validator runs. Gradio streams each yielded tuple to the connected
outputs."""
lines: list[str] = []
def log(line: str) -> None:
lines.append(line)
yield "\n".join(lines), "", "(runningβ¦)", None
try:
result = run_validator(
dataset=dataset.strip(),
profile=profile,
version=version.strip() or DEFAULT_VERSION,
open_pr=open_pr,
log=log,
)
except Exception as e:
lines.append(f"\nERROR: {type(e).__name__}: {e}")
yield "\n".join(lines), "", f"error: {e}", None
return
status_badge = f"**{result.status.upper()}** β {result.summary}"
if result.pr_url:
status_badge += f"\n\nPR: {result.pr_url}"
report_index = result.report_path / "index.html"
report_url = str(report_index) if report_index.is_file() else None
yield (
"\n".join(lines),
status_badge,
result.summary,
report_url,
)
with gr.Blocks(title="SimReady Validator") as demo:
gr.Markdown(
"# SimReady Validator\n"
"Validate a HuggingFace dataset against a SimReady profile. "
"Reads the dataset directly from HF storage β no copy onto NVIDIA "
"infrastructure. With **Open PR** enabled, the verdict is uploaded "
"back to the dataset as a `validation/` pull request."
)
with gr.Row():
dataset = gr.Textbox(
label="Dataset",
placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
)
with gr.Row():
profile = gr.Dropdown(
choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
)
version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
run_btn = gr.Button("Validate", variant="primary")
status_md = gr.Markdown(label="Verdict")
summary_box = gr.Textbox(label="Summary", interactive=False)
log_box = gr.Textbox(label="Log", lines=20, interactive=False)
report_link = gr.File(label="HTML report (download)", interactive=False)
run_btn.click(
fn=_run_streaming,
inputs=[dataset, profile, version, open_pr],
outputs=[log_box, status_md, summary_box, report_link],
api_name="run",
)
# Programmatic endpoint β bound to invisible components so the UI
# doesn't render anything extra, but the Gradio queue still exposes
# an `/api/predict/run_api` route the gradio_client can hit. The
# outputs[0] is the JSON return; api_name turns it into a stable
# path the GitHub Actions caller depends on.
api_dataset = gr.Textbox(visible=False)
api_profile = gr.Textbox(visible=False)
api_version = gr.Textbox(visible=False)
api_open_pr = gr.Checkbox(visible=False)
api_submission_id = gr.Textbox(visible=False)
api_force = gr.Checkbox(visible=False)
api_preliminary = gr.Checkbox(visible=False)
api_output = gr.JSON(visible=False)
api_button = gr.Button(visible=False)
api_button.click(
fn=_run_api,
inputs=[api_dataset, api_profile, api_version, api_open_pr,
api_submission_id, api_force, api_preliminary],
outputs=api_output,
api_name="run_api",
)
# Progress endpoint β polled by the dashboard while a row is
# validating. CORS is open on /gradio_api/* by default, so the
# browser can fetch this from github.io directly without any
# GitHub-Actions side polling/commit churn.
prog_in = gr.Textbox(visible=False)
prog_out = gr.JSON(visible=False)
prog_button = gr.Button(visible=False)
prog_button.click(
fn=_get_progress,
inputs=[prog_in],
outputs=prog_out,
api_name="get_progress",
)
# Profile-listing endpoint β polled by the dashboard at startup
# so its dropdown reflects what's actually loadable on this Space
# right now (foundation+validator pin determines which profiles
# register). Stops the operator from picking something that
# would fatal at runtime.
profiles_out = gr.JSON(visible=False)
profiles_button = gr.Button(visible=False)
profiles_button.click(
fn=_list_profiles,
inputs=None,
outputs=profiles_out,
api_name="list_profiles",
)
# Cancel endpoint β the dashboard's Cancel button calls this AFTER
# cancelling the GH Action so the in-flight server-side gradio call
# actually stops (cancelling the Action alone only kills the
# gradio_client wrapper, the Space's loop keeps going).
cancel_in = gr.Textbox(visible=False)
cancel_out = gr.JSON(visible=False)
cancel_button = gr.Button(visible=False)
cancel_button.click(
fn=_cancel_run,
inputs=[cancel_in],
outputs=cancel_out,
api_name="cancel_run",
)
if __name__ == "__main__":
demo.queue().launch(
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
)
|