loginowskid's picture
Debug: log preliminary flag in _run_api + run() startup
555a14f verified
"""SimReady Validator — Gradio UI for the HuggingFace Space.
Two surfaces, same engine:
- **/run** (the on-screen button) — streams log lines to the UI for
interactive use by an operator in the browser.
- **/run_api** (hidden, programmatic) — returns the full RunResult as
a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
hits from the GitHub Actions runner so the workflow can patch
status.json and asset-status.json without scraping the UI's text.
Both go through `runner.run()`. The split is purely about output
shape (streaming text vs. one-shot dict).
The Space is internal-pilot scope: HF_TOKEN comes from the Space's
secrets, NOT from the requester. When a customer's dataset PR triggers
this (next milestone), the webhook payload identifies the dataset and
the Space's own token opens the verdict PR.
"""
from __future__ import annotations
import dataclasses
import json
import os
from pathlib import Path
import gradio as gr
from runner import run as run_validator, progress_path_for, cancel_path_for, CANCEL_DIR
PROFILE_CHOICES = [
"Prop-Robotics-Neutral",
"Prop-Robotics-Physx",
"Prop-Robotics-Isaac",
"Robot-Body-Neutral",
"Robot-Body-Runnable",
"Robot-Body-Isaac",
"Package",
"Package-Candidate",
]
DEFAULT_PROFILE = "Prop-Robotics-Neutral"
DEFAULT_VERSION = "1.0.0"
def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
submission_id: str = "", force: bool = False,
preliminary: bool = False) -> dict:
"""Programmatic endpoint. Returns the RunResult as a JSON dict.
Caller is typically `tools/hf_watch/call_hf_space.py` running from
a GitHub Actions ubuntu-latest runner. Output shape must stay
stable — bump `schema_version` if you change it. The receiver
pattern-matches on the same field names `tools/hf_watch/validate.py`
produces, so status.json patching is identical regardless of which
backend ran the validation.
`submission_id` is optional — when set, the validator writes
per-asset progress to /tmp/sr-progress/<id>.json, which the
get_progress endpoint serves to the dashboard.
`preliminary` switches the runner to a structure-only sweep:
zip-bundled datasets are scanned (instead of failing
PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
sliced to the first asset only. Used by the dashboard's
Preliminary scan tab.
"""
print(f"[run_api] preliminary={preliminary!r} force={force!r} "
f"submission_id={submission_id!r}", flush=True)
result = run_validator(
dataset=(dataset or "").strip(),
profile=profile or DEFAULT_PROFILE,
version=(version or DEFAULT_VERSION).strip(),
open_pr=bool(open_pr),
submission_id=(submission_id or "").strip(),
force=bool(force),
preliminary=bool(preliminary),
)
return {
"schema_version": 1,
"dataset": result.dataset,
"profile": result.profile,
"version": result.version,
"status": result.status,
"summary": result.summary,
"results_json": _sanitize_results_json(result.results_json),
"pr_url": result.pr_url,
}
def _list_profiles() -> dict:
"""Return the set of profiles that actually load on this Space's
foundation+validator combination. The dashboard polls this to
populate its dropdown so operators can't pick a profile that
would fatally fail at registration time.
Uses --use-plugin since the default CLI loader has known
registration mismatches against the current foundation pin; the
plugin path is what runner.py's streaming-zip flow falls back
to and is the source of truth for "actually usable" here.
Output format from validate.py is `PROFILE: <id> v<version>`
per profile, one per line.
"""
import subprocess, sys
from runner import VALIDATOR
try:
proc = subprocess.run(
# --no-use-kit: validate.py auto-enables --use-kit when the
# default profile is PhysX-bearing, and then errors out
# because the Space has no Kit Python. That bailout happens
# before --list-profiles iterates, masking everything as
# "0 profiles". Explicitly disabling Kit lets the loader run.
[sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
capture_output=True, text=True, timeout=300,
)
names: list[str] = []
for line in (proc.stdout or "").splitlines():
s = line.strip()
# Validator emits "PROFILE: <id> v<version>" — that's our
# only authoritative shape. Anything else is noise.
if s.startswith("PROFILE:"):
rest = s[len("PROFILE:"):].strip()
pid = rest.split()[0] if rest else ""
if pid:
names.append(pid)
# Dedupe while preserving order.
seen = set()
unique = []
for n in names:
if n not in seen:
seen.add(n)
unique.append(n)
result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
if not unique:
# No profiles registered AND no parse hits — surface why so
# the dashboard can show something useful. Truncate so the
# JSON response stays small.
stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
result["stderr_tail"] = stderr_tail
result["stdout_tail"] = stdout_tail
return result
except subprocess.TimeoutExpired:
return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
except Exception as e:
return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
def _cancel_run(submission_id: str) -> dict:
"""Touch the cancel-signal file for a given submission. The
streaming-zip loop in runner.py checks for this file between
zips and aborts when present. Idempotent — calling multiple
times has no extra effect; consuming runner.py deletes it."""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
path = cancel_path_for(sid)
if path is None:
return {"state": "no_id"}
try:
CANCEL_DIR.mkdir(parents=True, exist_ok=True)
path.touch(exist_ok=True)
return {"state": "signaled", "path": str(path)}
except OSError as e:
return {"state": "error", "error": f"{type(e).__name__}: {e}"}
def _get_progress(submission_id: str) -> dict:
"""Read the validator's per-asset progress file for this submission.
Polled by the dashboard ~every 3 s while a Validate-now click is
in-flight, so the "Validate now" button can fill up as the
validator works through the asset list.
Returns one of three shapes:
- {"state": "not_found"} — no progress file (Space restarted, or
the dashboard is polling a Space-run that never happened).
- {"state": "starting"} — file seeded by runner.py before the
validator started its loop. processed/total are 0.
- {processed, total, current, started_at, updated_at} — live
per-asset progress written by validate.py._emit_progress.
Caller treats anything with total > 0 as "show the fill bar".
"""
sid = (submission_id or "").strip()
if not sid:
return {"state": "no_id"}
path = progress_path_for(sid)
if path is None or not path.is_file():
return {"state": "not_found"}
try:
return json.loads(path.read_text(encoding="utf-8"))
except (OSError, json.JSONDecodeError):
# Mid-write — caller will poll again in a few seconds.
return {"state": "transient"}
def _sanitize_results_json(raw: dict) -> dict:
"""Strip absolute filesystem paths from results_json before returning.
Gradio's JSON serializer treats string fields that resolve to files
on the Space's filesystem as downloadable references and tries to
serve them through `/gradio_api/file=...`. The validator's
results.json contains absolute paths (target dir + per-asset
`path`) which point into the Space's ephemeral tempdir and are
NOT exposed through gradio's allowed_paths — gradio_client then
fails with 403 trying to auto-fetch them after a successful run.
Callers don't need filesystem paths anyway — only `rel_path`
(dataset-relative), `passed`, and `issues` are used downstream.
Keep the rest of the report intact (profile_coverage, summary,
layout_findings, etc.).
"""
if not isinstance(raw, dict):
return raw
sanitized = {k: v for k, v in raw.items() if k != "target"}
if "results" in sanitized and isinstance(sanitized["results"], list):
sanitized["results"] = [
{k: v for k, v in asset.items() if k != "path"}
for asset in sanitized["results"]
if isinstance(asset, dict)
]
# Specs/dashboard dir paths are local to the Space, useless to caller.
for k in ("specs_docs_dir", "dashboard_docs_dir"):
sanitized.pop(k, None)
return sanitized
def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
"""Generator that yields incremental log output to the UI as the
validator runs. Gradio streams each yielded tuple to the connected
outputs."""
lines: list[str] = []
def log(line: str) -> None:
lines.append(line)
yield "\n".join(lines), "", "(running…)", None
try:
result = run_validator(
dataset=dataset.strip(),
profile=profile,
version=version.strip() or DEFAULT_VERSION,
open_pr=open_pr,
log=log,
)
except Exception as e:
lines.append(f"\nERROR: {type(e).__name__}: {e}")
yield "\n".join(lines), "", f"error: {e}", None
return
status_badge = f"**{result.status.upper()}** — {result.summary}"
if result.pr_url:
status_badge += f"\n\nPR: {result.pr_url}"
report_index = result.report_path / "index.html"
report_url = str(report_index) if report_index.is_file() else None
yield (
"\n".join(lines),
status_badge,
result.summary,
report_url,
)
with gr.Blocks(title="SimReady Validator") as demo:
gr.Markdown(
"# SimReady Validator\n"
"Validate a HuggingFace dataset against a SimReady profile. "
"Reads the dataset directly from HF storage — no copy onto NVIDIA "
"infrastructure. With **Open PR** enabled, the verdict is uploaded "
"back to the dataset as a `validation/` pull request."
)
with gr.Row():
dataset = gr.Textbox(
label="Dataset",
placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
)
with gr.Row():
profile = gr.Dropdown(
choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
)
version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
run_btn = gr.Button("Validate", variant="primary")
status_md = gr.Markdown(label="Verdict")
summary_box = gr.Textbox(label="Summary", interactive=False)
log_box = gr.Textbox(label="Log", lines=20, interactive=False)
report_link = gr.File(label="HTML report (download)", interactive=False)
run_btn.click(
fn=_run_streaming,
inputs=[dataset, profile, version, open_pr],
outputs=[log_box, status_md, summary_box, report_link],
api_name="run",
)
# Programmatic endpoint — bound to invisible components so the UI
# doesn't render anything extra, but the Gradio queue still exposes
# an `/api/predict/run_api` route the gradio_client can hit. The
# outputs[0] is the JSON return; api_name turns it into a stable
# path the GitHub Actions caller depends on.
api_dataset = gr.Textbox(visible=False)
api_profile = gr.Textbox(visible=False)
api_version = gr.Textbox(visible=False)
api_open_pr = gr.Checkbox(visible=False)
api_submission_id = gr.Textbox(visible=False)
api_force = gr.Checkbox(visible=False)
api_preliminary = gr.Checkbox(visible=False)
api_output = gr.JSON(visible=False)
api_button = gr.Button(visible=False)
api_button.click(
fn=_run_api,
inputs=[api_dataset, api_profile, api_version, api_open_pr,
api_submission_id, api_force, api_preliminary],
outputs=api_output,
api_name="run_api",
)
# Progress endpoint — polled by the dashboard while a row is
# validating. CORS is open on /gradio_api/* by default, so the
# browser can fetch this from github.io directly without any
# GitHub-Actions side polling/commit churn.
prog_in = gr.Textbox(visible=False)
prog_out = gr.JSON(visible=False)
prog_button = gr.Button(visible=False)
prog_button.click(
fn=_get_progress,
inputs=[prog_in],
outputs=prog_out,
api_name="get_progress",
)
# Profile-listing endpoint — polled by the dashboard at startup
# so its dropdown reflects what's actually loadable on this Space
# right now (foundation+validator pin determines which profiles
# register). Stops the operator from picking something that
# would fatal at runtime.
profiles_out = gr.JSON(visible=False)
profiles_button = gr.Button(visible=False)
profiles_button.click(
fn=_list_profiles,
inputs=None,
outputs=profiles_out,
api_name="list_profiles",
)
# Cancel endpoint — the dashboard's Cancel button calls this AFTER
# cancelling the GH Action so the in-flight server-side gradio call
# actually stops (cancelling the Action alone only kills the
# gradio_client wrapper, the Space's loop keeps going).
cancel_in = gr.Textbox(visible=False)
cancel_out = gr.JSON(visible=False)
cancel_button = gr.Button(visible=False)
cancel_button.click(
fn=_cancel_run,
inputs=[cancel_in],
outputs=cancel_out,
api_name="cancel_run",
)
if __name__ == "__main__":
demo.queue().launch(
server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
)