Spaces:

nvidia
/

simready-validator

Sleeping

App Files Files Community

loginowskid commited on 4 days ago

Commit

74d52fc

verified ·

1 Parent(s): cd53438

list_profiles: --no-use-kit (fast enumeration; fixes 300s timeout)

Browse files

Files changed (1) hide show

tools/hf_space/app.py +429 -429

tools/hf_space/app.py CHANGED Viewed

@@ -1,429 +1,429 @@
-"""SimReady Validator — Gradio UI for the HuggingFace Space.
-Two surfaces, same engine:
-- **/run** (the on-screen button) — streams log lines to the UI for
-  interactive use by an operator in the browser.
-- **/run_api** (hidden, programmatic) — returns the full RunResult as
-  a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
-  hits from the GitHub Actions runner so the workflow can patch
-  status.json and asset-status.json without scraping the UI's text.
-Both go through `runner.run()`. The split is purely about output
-shape (streaming text vs. one-shot dict).
-The Space is internal-pilot scope: HF_TOKEN comes from the Space's
-secrets, NOT from the requester. When a customer's dataset PR triggers
-this (next milestone), the webhook payload identifies the dataset and
-the Space's own token opens the verdict PR.
-"""
-from __future__ import annotations
-import json
-import os
-from pathlib import Path
-import gradio as gr
-from runner import (run as run_validator, progress_path_for, cancel_path_for,
-                    run_token_path_for, CANCEL_DIR)
-PROFILE_CHOICES = [
-    "Prop-Robotics-Neutral",
-    "Prop-Robotics-Physx",
-    "Prop-Robotics-Isaac",
-    "Robot-Body-Neutral",
-    "Robot-Body-Runnable",
-    "Robot-Body-Isaac",
-    "Package",
-    "Package-Candidate",
-]
-DEFAULT_PROFILE = "Prop-Robotics-Neutral"
-DEFAULT_VERSION = "1.0.0"
-def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
-             submission_id: str = "", force: bool = False,
-             preliminary: bool = False) -> dict:
-    """Programmatic endpoint. Returns the RunResult as a JSON dict.
-    Caller is typically `tools/hf_watch/call_hf_space.py` running from
-    a GitHub Actions ubuntu-latest runner. Output shape must stay
-    stable — bump `schema_version` if you change it. The receiver
-    pattern-matches on the same field names `tools/hf_watch/validate.py`
-    produces, so status.json patching is identical regardless of which
-    backend ran the validation.
-    `submission_id` is optional — when set, the validator writes
-    per-asset progress to /tmp/sr-progress/<id>.json, which the
-    get_progress endpoint serves to the dashboard.
-    `preliminary` switches the runner to a structure-only sweep:
-    zip-bundled datasets are scanned (instead of failing
-    PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
-    sliced to the first asset only. Used by the dashboard's
-    Preliminary scan tab.
-    """
-    print(f"[run_api] preliminary={preliminary!r} force={force!r} "
-          f"submission_id={submission_id!r}", flush=True)
-    # Untrusted callers can hit /run_api directly — profile/version flow
-    # into the validator's argv, so validate them before use. Empty
-    # falls back to the defaults (existing behavior).
-    import re
-    profile = profile or DEFAULT_PROFILE
-    if profile not in PROFILE_CHOICES and profile.lower() != "auto":
-        raise ValueError(f"invalid profile: {profile!r}")
-    version = (version or DEFAULT_VERSION).strip()
-    if not re.fullmatch(r"[\w.\-]+", version):
-        raise ValueError(f"invalid version: {version!r}")
-    result = run_validator(
-        dataset=(dataset or "").strip(),
-        profile=profile,
-        version=version,
-        open_pr=bool(open_pr),
-        submission_id=(submission_id or "").strip(),
-        force=bool(force),
-        preliminary=bool(preliminary),
-    )
-    return {
-        "schema_version": 1,
-        "dataset": result.dataset,
-        "profile": result.profile,
-        "version": result.version,
-        "status": result.status,
-        "summary": result.summary,
-        "results_json": _sanitize_results_json(result.results_json),
-        "pr_url": result.pr_url,
-    }
-def _list_profiles() -> dict:
-    """Return the set of profiles that actually load on this Space's
-    foundation+validator combination. The dashboard polls this to
-    populate its dropdown so operators can't pick a profile that
-    would fatally fail at registration time.
-    Uses --use-plugin since the default CLI loader has known
-    registration mismatches against the current foundation pin; the
-    plugin path is what runner.py's streaming-zip flow falls back
-    to and is the source of truth for "actually usable" here.
-    Output format from validate.py is `PROFILE: <id> v<version>`
-    per profile, one per line.
-    """
-    import subprocess, sys
-    from runner import VALIDATOR
-    try:
-        proc = subprocess.run(
-            # --list-profiles only ENUMERATES registered profiles from the
-            # spec/plugin registry (--use-plugin) — it runs no validation
-            # rules, so it never needs Kit. Force --no-use-kit: on a
-            # Kit-enabled image the validator auto-enables --use-kit for the
-            # PhysX-bearing default profile and boots the full Isaac Sim
-            # runtime (~5 min) just to print the list, blowing the 300s
-            # timeout below. Actual validation (runner.py) still uses Kit.
-            [sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
-            capture_output=True, text=True, timeout=300,
-        )
-        names: list[str] = []
-        for line in (proc.stdout or "").splitlines():
-            s = line.strip()
-            # Validator emits "PROFILE: <id> v<version>" — that's our
-            # only authoritative shape. Anything else is noise.
-            if s.startswith("PROFILE:"):
-                rest = s[len("PROFILE:"):].strip()
-                pid = rest.split()[0] if rest else ""
-                if pid:
-                    names.append(pid)
-        # Dedupe while preserving order.
-        seen = set()
-        unique = []
-        for n in names:
-            if n not in seen:
-                seen.add(n)
-                unique.append(n)
-        result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
-        if not unique:
-            # No profiles registered AND no parse hits — surface why so
-            # the dashboard can show something useful. Truncate so the
-            # JSON response stays small.
-            stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
-            stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
-            result["stderr_tail"] = stderr_tail
-            result["stdout_tail"] = stdout_tail
-        return result
-    except subprocess.TimeoutExpired:
-        return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
-    except Exception as e:
-        return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
-def _cancel_run(submission_id: str, run_token: str = "") -> dict:
-    """Write the cancel-signal file for a given submission. The
-    streaming-zip loop in runner.py checks for this file between zips
-    and aborts when present. Idempotent — calling multiple times has no
-    extra effect; consuming runner.py deletes it.
-    `run_token` is the per-run token the dashboard read from get_progress.
-    It becomes the flag's content so runner._is_cancelled only honors it
-    for the exact run it was issued against — a flag left over from a
-    prior run of this submission can never abort a fresh one."""
-    sid = (submission_id or "").strip()
-    if not sid:
-        return {"state": "no_id"}
-    path = cancel_path_for(sid)
-    if path is None:
-        return {"state": "no_id"}
-    try:
-        CANCEL_DIR.mkdir(parents=True, exist_ok=True)
-        path.write_text((run_token or "").strip(), encoding="utf-8")
-        return {"state": "signaled", "path": str(path)}
-    except OSError as e:
-        return {"state": "error", "error": f"{type(e).__name__}: {e}"}
-def _get_progress(submission_id: str) -> dict:
-    """Read the validator's per-asset progress file for this submission.
-    Polled by the dashboard ~every 3 s while a Validate-now click is
-    in-flight, so the "Validate now" button can fill up as the
-    validator works through the asset list.
-    Returns one of three shapes:
-      - {"state": "not_found"} — no progress file (Space restarted, or
-        the dashboard is polling a Space-run that never happened).
-      - {"state": "starting"}  — file seeded by runner.py before the
-        validator started its loop. processed/total are 0.
-      - {processed, total, current, started_at, updated_at} — live
-        per-asset progress written by validate.py._emit_progress.
-    Every shape also carries `run_token` (the current run's cancel
-    token, from the sidecar file) when one exists, so the dashboard can
-    echo it back to cancel_run and target the exact run.
-    Caller treats anything with total > 0 as "show the fill bar".
-    """
-    sid = (submission_id or "").strip()
-    if not sid:
-        return {"state": "no_id"}
-    # Per-run cancel token (sidecar; see runner.run_token_path_for).
-    # Surfaced on every shape so the dashboard can echo it back to
-    # cancel_run — a cancel then only aborts the run it was issued
-    # against, never a later one that reused the submission_id.
-    run_token = ""
-    tok_path = run_token_path_for(sid)
-    if tok_path and tok_path.is_file():
-        try:
-            run_token = tok_path.read_text(encoding="utf-8").strip()
-        except OSError:
-            pass
-    path = progress_path_for(sid)
-    if path is None or not path.is_file():
-        return {"state": "not_found", "run_token": run_token}
-    try:
-        data = json.loads(path.read_text(encoding="utf-8"))
-        if isinstance(data, dict) and run_token:
-            data["run_token"] = run_token
-        return data
-    except (OSError, json.JSONDecodeError):
-        # Mid-write — caller will poll again in a few seconds.
-        return {"state": "transient", "run_token": run_token}
-def _sanitize_results_json(raw: dict) -> dict:
-    """Strip absolute filesystem paths from results_json before returning.
-    Gradio's JSON serializer treats string fields that resolve to files
-    on the Space's filesystem as downloadable references and tries to
-    serve them through `/gradio_api/file=...`. The validator's
-    results.json contains absolute paths (target dir + per-asset
-    `path`) which point into the Space's ephemeral tempdir and are
-    NOT exposed through gradio's allowed_paths — gradio_client then
-    fails with 403 trying to auto-fetch them after a successful run.
-    Callers don't need filesystem paths anyway — only `rel_path`
-    (dataset-relative), `passed`, and `issues` are used downstream.
-    Keep the rest of the report intact (profile_coverage, summary,
-    layout_findings, etc.).
-    """
-    if not isinstance(raw, dict):
-        return raw
-    sanitized = {k: v for k, v in raw.items() if k != "target"}
-    if "results" in sanitized and isinstance(sanitized["results"], list):
-        sanitized["results"] = [
-            {k: v for k, v in asset.items() if k != "path"}
-            for asset in sanitized["results"]
-            if isinstance(asset, dict)
-        ]
-    # Specs/dashboard dir paths are local to the Space, useless to caller.
-    for k in ("specs_docs_dir", "dashboard_docs_dir"):
-        sanitized.pop(k, None)
-    return sanitized
-def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
-    """Generator that yields incremental log output to the UI as the
-    validator runs. Gradio streams each yielded tuple to the connected
-    outputs."""
-    lines: list[str] = []
-    def log(line: str) -> None:
-        lines.append(line)
-    yield "\n".join(lines), "", "(running…)", None
-    try:
-        result = run_validator(
-            dataset=dataset.strip(),
-            profile=profile,
-            version=version.strip() or DEFAULT_VERSION,
-            open_pr=open_pr,
-            log=log,
-        )
-    except Exception as e:
-        lines.append(f"\nERROR: {type(e).__name__}: {e}")
-        yield "\n".join(lines), "", f"error: {e}", None
-        return
-    status_badge = f"**{result.status.upper()}** — {result.summary}"
-    if result.pr_url:
-        status_badge += f"\n\nPR: {result.pr_url}"
-    report_index = result.report_path / "index.html"
-    report_url = str(report_index) if report_index.is_file() else None
-    yield (
-        "\n".join(lines),
-        status_badge,
-        result.summary,
-        report_url,
-    )
-def _read_md(name: str) -> str:
-    """Return the contents of name (relative to this file's dir),
-    stripping a leading YAML frontmatter block if present. Falls back
-    to a friendly stub when the file is missing — keeps the Space
-    bootable even before the space-deploy workflow has synced the
-    assembled docs into the container."""
-    from pathlib import Path
-    p = Path(__file__).resolve().parent / name
-    try:
-        src = p.read_text(encoding="utf-8")
-    except FileNotFoundError:
-        return f"_{name} not yet synced into this Space — check back after the next deploy._"
-    if src.startswith("---"):
-        end = src.find("\n---\n", 4)
-        if end > 0:
-            src = src[end + len("\n---\n"):].lstrip()
-    return src
-with gr.Blocks(title="SimReady Validator") as demo:
-    with gr.Tabs():
-        with gr.Tab("Overview"):
-            gr.Markdown(_read_md("README.md"))
-        with gr.Tab("Validator"):
-            gr.Markdown(
-                "Submit a HuggingFace dataset to validate against a SimReady "
-                "profile. With **Open PR** enabled, the verdict is uploaded "
-                "back to the dataset as a `validation/` pull request."
-            )
-            with gr.Row():
-                dataset = gr.Textbox(
-                    label="Dataset",
-                    placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
-                )
-            with gr.Row():
-                profile = gr.Dropdown(
-                    choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
-                )
-                version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
-                open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
-            run_btn = gr.Button("Validate", variant="primary")
-            status_md = gr.Markdown(label="Verdict")
-            summary_box = gr.Textbox(label="Summary", interactive=False)
-            log_box = gr.Textbox(label="Log", lines=20, interactive=False)
-            report_link = gr.File(label="HTML report (download)", interactive=False)
-        with gr.Tab("Partner walkthrough"):
-            gr.Markdown(_read_md("VALIDATE.md"))
-    run_btn.click(
-        fn=_run_streaming,
-        inputs=[dataset, profile, version, open_pr],
-        outputs=[log_box, status_md, summary_box, report_link],
-        api_name="run",
-    )
-    # Programmatic endpoint — bound to invisible components so the UI
-    # doesn't render anything extra, but the Gradio queue still exposes
-    # an `/api/predict/run_api` route the gradio_client can hit. The
-    # outputs[0] is the JSON return; api_name turns it into a stable
-    # path the GitHub Actions caller depends on.
-    api_dataset = gr.Textbox(visible=False)
-    api_profile = gr.Textbox(visible=False)
-    api_version = gr.Textbox(visible=False)
-    api_open_pr = gr.Checkbox(visible=False)
-    api_submission_id = gr.Textbox(visible=False)
-    api_force = gr.Checkbox(visible=False)
-    api_preliminary = gr.Checkbox(visible=False)
-    api_output  = gr.JSON(visible=False)
-    api_button  = gr.Button(visible=False)
-    api_button.click(
-        fn=_run_api,
-        inputs=[api_dataset, api_profile, api_version, api_open_pr,
-                api_submission_id, api_force, api_preliminary],
-        outputs=api_output,
-        api_name="run_api",
-    )
-    # Progress endpoint — polled by the dashboard while a row is
-    # validating. CORS is open on /gradio_api/* by default, so the
-    # browser can fetch this from github.io directly without any
-    # GitHub-Actions side polling/commit churn.
-    prog_in     = gr.Textbox(visible=False)
-    prog_out    = gr.JSON(visible=False)
-    prog_button = gr.Button(visible=False)
-    prog_button.click(
-        fn=_get_progress,
-        inputs=[prog_in],
-        outputs=prog_out,
-        api_name="get_progress",
-    )
-    # Profile-listing endpoint — polled by the dashboard at startup
-    # so its dropdown reflects what's actually loadable on this Space
-    # right now (foundation+validator pin determines which profiles
-    # register). Stops the operator from picking something that
-    # would fatal at runtime.
-    profiles_out    = gr.JSON(visible=False)
-    profiles_button = gr.Button(visible=False)
-    profiles_button.click(
-        fn=_list_profiles,
-        inputs=None,
-        outputs=profiles_out,
-        api_name="list_profiles",
-    )
-    # Cancel endpoint — the dashboard's Cancel button calls this AFTER
-    # cancelling the GH Action so the in-flight server-side gradio call
-    # actually stops (cancelling the Action alone only kills the
-    # gradio_client wrapper, the Space's loop keeps going).
-    cancel_in     = gr.Textbox(visible=False)
-    cancel_token  = gr.Textbox(visible=False)
-    cancel_out    = gr.JSON(visible=False)
-    cancel_button = gr.Button(visible=False)
-    cancel_button.click(
-        fn=_cancel_run,
-        inputs=[cancel_in, cancel_token],
-        outputs=cancel_out,
-        api_name="cancel_run",
-    )
-if __name__ == "__main__":
-    demo.queue().launch(
-        server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
-        server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
-    )

+"""SimReady Validator — Gradio UI for the HuggingFace Space.
+Two surfaces, same engine:
+- **/run** (the on-screen button) — streams log lines to the UI for
+  interactive use by an operator in the browser.
+- **/run_api** (hidden, programmatic) — returns the full RunResult as
+  a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
+  hits from the GitHub Actions runner so the workflow can patch
+  status.json and asset-status.json without scraping the UI's text.
+Both go through `runner.run()`. The split is purely about output
+shape (streaming text vs. one-shot dict).
+The Space is internal-pilot scope: HF_TOKEN comes from the Space's
+secrets, NOT from the requester. When a customer's dataset PR triggers
+this (next milestone), the webhook payload identifies the dataset and
+the Space's own token opens the verdict PR.
+"""
+from __future__ import annotations
+import json
+import os
+from pathlib import Path
+import gradio as gr
+from runner import (run as run_validator, progress_path_for, cancel_path_for,
+                    run_token_path_for, CANCEL_DIR)
+PROFILE_CHOICES = [
+    "Prop-Robotics-Neutral",
+    "Prop-Robotics-Physx",
+    "Prop-Robotics-Isaac",
+    "Robot-Body-Neutral",
+    "Robot-Body-Runnable",
+    "Robot-Body-Isaac",
+    "Package",
+    "Package-Candidate",
+]
+DEFAULT_PROFILE = "Prop-Robotics-Neutral"
+DEFAULT_VERSION = "1.0.0"
+def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
+             submission_id: str = "", force: bool = False,
+             preliminary: bool = False) -> dict:
+    """Programmatic endpoint. Returns the RunResult as a JSON dict.
+    Caller is typically `tools/hf_watch/call_hf_space.py` running from
+    a GitHub Actions ubuntu-latest runner. Output shape must stay
+    stable — bump `schema_version` if you change it. The receiver
+    pattern-matches on the same field names `tools/hf_watch/validate.py`
+    produces, so status.json patching is identical regardless of which
+    backend ran the validation.
+    `submission_id` is optional — when set, the validator writes
+    per-asset progress to /tmp/sr-progress/<id>.json, which the
+    get_progress endpoint serves to the dashboard.
+    `preliminary` switches the runner to a structure-only sweep:
+    zip-bundled datasets are scanned (instead of failing
+    PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
+    sliced to the first asset only. Used by the dashboard's
+    Preliminary scan tab.
+    """
+    print(f"[run_api] preliminary={preliminary!r} force={force!r} "
+          f"submission_id={submission_id!r}", flush=True)
+    # Untrusted callers can hit /run_api directly — profile/version flow
+    # into the validator's argv, so validate them before use. Empty
+    # falls back to the defaults (existing behavior).
+    import re
+    profile = profile or DEFAULT_PROFILE
+    if profile not in PROFILE_CHOICES and profile.lower() != "auto":
+        raise ValueError(f"invalid profile: {profile!r}")
+    version = (version or DEFAULT_VERSION).strip()
+    if not re.fullmatch(r"[\w.\-]+", version):
+        raise ValueError(f"invalid version: {version!r}")
+    result = run_validator(
+        dataset=(dataset or "").strip(),
+        profile=profile,
+        version=version,
+        open_pr=bool(open_pr),
+        submission_id=(submission_id or "").strip(),
+        force=bool(force),
+        preliminary=bool(preliminary),
+    )
+    return {
+        "schema_version": 1,
+        "dataset": result.dataset,
+        "profile": result.profile,
+        "version": result.version,
+        "status": result.status,
+        "summary": result.summary,
+        "results_json": _sanitize_results_json(result.results_json),
+        "pr_url": result.pr_url,
+    }
+def _list_profiles() -> dict:
+    """Return the set of profiles that actually load on this Space's
+    foundation+validator combination. The dashboard polls this to
+    populate its dropdown so operators can't pick a profile that
+    would fatally fail at registration time.
+    Uses --use-plugin since the default CLI loader has known
+    registration mismatches against the current foundation pin; the
+    plugin path is what runner.py's streaming-zip flow falls back
+    to and is the source of truth for "actually usable" here.
+    Output format from validate.py is `PROFILE: <id> v<version>`
+    per profile, one per line.
+    """
+    import subprocess, sys
+    from runner import VALIDATOR
+    try:
+        proc = subprocess.run(
+            # --list-profiles only ENUMERATES registered profiles from the
+            # spec/plugin registry (--use-plugin) — it runs no validation
+            # rules, so it never needs Kit. Force --no-use-kit: on a
+            # Kit-enabled image the validator auto-enables --use-kit for the
+            # PhysX-bearing default profile and boots the full Isaac Sim
+            # runtime (~5 min) just to print the list, blowing the 300s
+            # timeout below. Actual validation (runner.py) still uses Kit.
+            [sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
+            capture_output=True, text=True, timeout=300,
+        )
+        names: list[str] = []
+        for line in (proc.stdout or "").splitlines():
+            s = line.strip()
+            # Validator emits "PROFILE: <id> v<version>" — that's our
+            # only authoritative shape. Anything else is noise.
+            if s.startswith("PROFILE:"):
+                rest = s[len("PROFILE:"):].strip()
+                pid = rest.split()[0] if rest else ""
+                if pid:
+                    names.append(pid)
+        # Dedupe while preserving order.
+        seen = set()
+        unique = []
+        for n in names:
+            if n not in seen:
+                seen.add(n)
+                unique.append(n)
+        result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
+        if not unique:
+            # No profiles registered AND no parse hits — surface why so
+            # the dashboard can show something useful. Truncate so the
+            # JSON response stays small.
+            stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
+            stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
+            result["stderr_tail"] = stderr_tail
+            result["stdout_tail"] = stdout_tail
+        return result
+    except subprocess.TimeoutExpired:
+        return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
+    except Exception as e:
+        return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
+def _cancel_run(submission_id: str, run_token: str = "") -> dict:
+    """Write the cancel-signal file for a given submission. The
+    streaming-zip loop in runner.py checks for this file between zips
+    and aborts when present. Idempotent — calling multiple times has no
+    extra effect; consuming runner.py deletes it.
+    `run_token` is the per-run token the dashboard read from get_progress.
+    It becomes the flag's content so runner._is_cancelled only honors it
+    for the exact run it was issued against — a flag left over from a
+    prior run of this submission can never abort a fresh one."""
+    sid = (submission_id or "").strip()
+    if not sid:
+        return {"state": "no_id"}
+    path = cancel_path_for(sid)
+    if path is None:
+        return {"state": "no_id"}
+    try:
+        CANCEL_DIR.mkdir(parents=True, exist_ok=True)
+        path.write_text((run_token or "").strip(), encoding="utf-8")
+        return {"state": "signaled", "path": str(path)}
+    except OSError as e:
+        return {"state": "error", "error": f"{type(e).__name__}: {e}"}
+def _get_progress(submission_id: str) -> dict:
+    """Read the validator's per-asset progress file for this submission.
+    Polled by the dashboard ~every 3 s while a Validate-now click is
+    in-flight, so the "Validate now" button can fill up as the
+    validator works through the asset list.
+    Returns one of three shapes:
+      - {"state": "not_found"} — no progress file (Space restarted, or
+        the dashboard is polling a Space-run that never happened).
+      - {"state": "starting"}  — file seeded by runner.py before the
+        validator started its loop. processed/total are 0.
+      - {processed, total, current, started_at, updated_at} — live
+        per-asset progress written by validate.py._emit_progress.
+    Every shape also carries `run_token` (the current run's cancel
+    token, from the sidecar file) when one exists, so the dashboard can
+    echo it back to cancel_run and target the exact run.
+    Caller treats anything with total > 0 as "show the fill bar".
+    """
+    sid = (submission_id or "").strip()
+    if not sid:
+        return {"state": "no_id"}
+    # Per-run cancel token (sidecar; see runner.run_token_path_for).
+    # Surfaced on every shape so the dashboard can echo it back to
+    # cancel_run — a cancel then only aborts the run it was issued
+    # against, never a later one that reused the submission_id.
+    run_token = ""
+    tok_path = run_token_path_for(sid)
+    if tok_path and tok_path.is_file():
+        try:
+            run_token = tok_path.read_text(encoding="utf-8").strip()
+        except OSError:
+            pass
+    path = progress_path_for(sid)
+    if path is None or not path.is_file():
+        return {"state": "not_found", "run_token": run_token}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+        if isinstance(data, dict) and run_token:
+            data["run_token"] = run_token
+        return data
+    except (OSError, json.JSONDecodeError):
+        # Mid-write — caller will poll again in a few seconds.
+        return {"state": "transient", "run_token": run_token}
+def _sanitize_results_json(raw: dict) -> dict:
+    """Strip absolute filesystem paths from results_json before returning.
+    Gradio's JSON serializer treats string fields that resolve to files
+    on the Space's filesystem as downloadable references and tries to
+    serve them through `/gradio_api/file=...`. The validator's
+    results.json contains absolute paths (target dir + per-asset
+    `path`) which point into the Space's ephemeral tempdir and are
+    NOT exposed through gradio's allowed_paths — gradio_client then
+    fails with 403 trying to auto-fetch them after a successful run.
+    Callers don't need filesystem paths anyway — only `rel_path`
+    (dataset-relative), `passed`, and `issues` are used downstream.
+    Keep the rest of the report intact (profile_coverage, summary,
+    layout_findings, etc.).
+    """
+    if not isinstance(raw, dict):
+        return raw
+    sanitized = {k: v for k, v in raw.items() if k != "target"}
+    if "results" in sanitized and isinstance(sanitized["results"], list):
+        sanitized["results"] = [
+            {k: v for k, v in asset.items() if k != "path"}
+            for asset in sanitized["results"]
+            if isinstance(asset, dict)
+        ]
+    # Specs/dashboard dir paths are local to the Space, useless to caller.
+    for k in ("specs_docs_dir", "dashboard_docs_dir"):
+        sanitized.pop(k, None)
+    return sanitized
+def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
+    """Generator that yields incremental log output to the UI as the
+    validator runs. Gradio streams each yielded tuple to the connected
+    outputs."""
+    lines: list[str] = []
+    def log(line: str) -> None:
+        lines.append(line)
+    yield "\n".join(lines), "", "(running…)", None
+    try:
+        result = run_validator(
+            dataset=dataset.strip(),
+            profile=profile,
+            version=version.strip() or DEFAULT_VERSION,
+            open_pr=open_pr,
+            log=log,
+        )
+    except Exception as e:
+        lines.append(f"\nERROR: {type(e).__name__}: {e}")
+        yield "\n".join(lines), "", f"error: {e}", None
+        return
+    status_badge = f"**{result.status.upper()}** — {result.summary}"
+    if result.pr_url:
+        status_badge += f"\n\nPR: {result.pr_url}"
+    report_index = result.report_path / "index.html"
+    report_url = str(report_index) if report_index.is_file() else None
+    yield (
+        "\n".join(lines),
+        status_badge,
+        result.summary,
+        report_url,
+    )
+def _read_md(name: str) -> str:
+    """Return the contents of name (relative to this file's dir),
+    stripping a leading YAML frontmatter block if present. Falls back
+    to a friendly stub when the file is missing — keeps the Space
+    bootable even before the space-deploy workflow has synced the
+    assembled docs into the container."""
+    from pathlib import Path
+    p = Path(__file__).resolve().parent / name
+    try:
+        src = p.read_text(encoding="utf-8")
+    except FileNotFoundError:
+        return f"_{name} not yet synced into this Space — check back after the next deploy._"
+    if src.startswith("---"):
+        end = src.find("\n---\n", 4)
+        if end > 0:
+            src = src[end + len("\n---\n"):].lstrip()
+    return src
+with gr.Blocks(title="SimReady Validator") as demo:
+    with gr.Tabs():
+        with gr.Tab("Overview"):
+            gr.Markdown(_read_md("README.md"))
+        with gr.Tab("Validator"):
+            gr.Markdown(
+                "Submit a HuggingFace dataset to validate against a SimReady "
+                "profile. With **Open PR** enabled, the verdict is uploaded "
+                "back to the dataset as a `validation/` pull request."
+            )
+            with gr.Row():
+                dataset = gr.Textbox(
+                    label="Dataset",
+                    placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
+                )
+            with gr.Row():
+                profile = gr.Dropdown(
+                    choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
+                )
+                version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
+                open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
+            run_btn = gr.Button("Validate", variant="primary")
+            status_md = gr.Markdown(label="Verdict")
+            summary_box = gr.Textbox(label="Summary", interactive=False)
+            log_box = gr.Textbox(label="Log", lines=20, interactive=False)
+            report_link = gr.File(label="HTML report (download)", interactive=False)
+        with gr.Tab("Partner walkthrough"):
+            gr.Markdown(_read_md("VALIDATE.md"))
+    run_btn.click(
+        fn=_run_streaming,
+        inputs=[dataset, profile, version, open_pr],
+        outputs=[log_box, status_md, summary_box, report_link],
+        api_name="run",
+    )
+    # Programmatic endpoint — bound to invisible components so the UI
+    # doesn't render anything extra, but the Gradio queue still exposes
+    # an `/api/predict/run_api` route the gradio_client can hit. The
+    # outputs[0] is the JSON return; api_name turns it into a stable
+    # path the GitHub Actions caller depends on.
+    api_dataset = gr.Textbox(visible=False)
+    api_profile = gr.Textbox(visible=False)
+    api_version = gr.Textbox(visible=False)
+    api_open_pr = gr.Checkbox(visible=False)
+    api_submission_id = gr.Textbox(visible=False)
+    api_force = gr.Checkbox(visible=False)
+    api_preliminary = gr.Checkbox(visible=False)
+    api_output  = gr.JSON(visible=False)
+    api_button  = gr.Button(visible=False)
+    api_button.click(
+        fn=_run_api,
+        inputs=[api_dataset, api_profile, api_version, api_open_pr,
+                api_submission_id, api_force, api_preliminary],
+        outputs=api_output,
+        api_name="run_api",
+    )
+    # Progress endpoint — polled by the dashboard while a row is
+    # validating. CORS is open on /gradio_api/* by default, so the
+    # browser can fetch this from github.io directly without any
+    # GitHub-Actions side polling/commit churn.
+    prog_in     = gr.Textbox(visible=False)
+    prog_out    = gr.JSON(visible=False)
+    prog_button = gr.Button(visible=False)
+    prog_button.click(
+        fn=_get_progress,
+        inputs=[prog_in],
+        outputs=prog_out,
+        api_name="get_progress",
+    )
+    # Profile-listing endpoint — polled by the dashboard at startup
+    # so its dropdown reflects what's actually loadable on this Space
+    # right now (foundation+validator pin determines which profiles
+    # register). Stops the operator from picking something that
+    # would fatal at runtime.
+    profiles_out    = gr.JSON(visible=False)
+    profiles_button = gr.Button(visible=False)
+    profiles_button.click(
+        fn=_list_profiles,
+        inputs=None,
+        outputs=profiles_out,
+        api_name="list_profiles",
+    )
+    # Cancel endpoint — the dashboard's Cancel button calls this AFTER
+    # cancelling the GH Action so the in-flight server-side gradio call
+    # actually stops (cancelling the Action alone only kills the
+    # gradio_client wrapper, the Space's loop keeps going).
+    cancel_in     = gr.Textbox(visible=False)
+    cancel_token  = gr.Textbox(visible=False)
+    cancel_out    = gr.JSON(visible=False)
+    cancel_button = gr.Button(visible=False)
+    cancel_button.click(
+        fn=_cancel_run,
+        inputs=[cancel_in, cancel_token],
+        outputs=cancel_out,
+        api_name="cancel_run",
+    )
+if __name__ == "__main__":
+    demo.queue().launch(
+        server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
+        server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
+    )