Spaces:

nvidia
/

simready-validator

Sleeping

File size: 17,672 Bytes

12e13e9

"""SimReady Validator — Gradio UI for the HuggingFace Space.

Two surfaces, same engine:

- **/run** (the on-screen button) — streams log lines to the UI for
  interactive use by an operator in the browser.
- **/run_api** (hidden, programmatic) — returns the full RunResult as
  a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
  hits from the GitHub Actions runner so the workflow can patch
  status.json and asset-status.json without scraping the UI's text.

Both go through `runner.run()`. The split is purely about output
shape (streaming text vs. one-shot dict).

The Space is internal-pilot scope: HF_TOKEN comes from the Space's
secrets, NOT from the requester. When a customer's dataset PR triggers
this (next milestone), the webhook payload identifies the dataset and
the Space's own token opens the verdict PR.
"""
from __future__ import annotations

import json
import os
from pathlib import Path

import gradio as gr

from runner import (run as run_validator, progress_path_for, cancel_path_for,
                    run_token_path_for, CANCEL_DIR)


PROFILE_CHOICES = [
    "Prop-Robotics-Neutral",
    "Prop-Robotics-Physx",
    "Prop-Robotics-Isaac",
    "Robot-Body-Neutral",
    "Robot-Body-Runnable",
    "Robot-Body-Isaac",
    "Package",
    "Package-Candidate",
]
DEFAULT_PROFILE = "Prop-Robotics-Neutral"
DEFAULT_VERSION = "1.0.0"


def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
             submission_id: str = "", force: bool = False,
             preliminary: bool = False, use_kit: bool = False) -> dict:
    """Programmatic endpoint. Returns the RunResult as a JSON dict.

    Caller is typically `tools/hf_watch/call_hf_space.py` running from
    a GitHub Actions ubuntu-latest runner. Output shape must stay
    stable — bump `schema_version` if you change it. The receiver
    pattern-matches on the same field names `tools/hf_watch/validate.py`
    produces, so status.json patching is identical regardless of which
    backend ran the validation.

    `submission_id` is optional — when set, the validator writes
    per-asset progress to /tmp/sr-progress/<id>.json, which the
    get_progress endpoint serves to the dashboard.

    `preliminary` switches the runner to a structure-only sweep:
    zip-bundled datasets are scanned (instead of failing
    PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
    sliced to the first asset only. Used by the dashboard's
    Preliminary scan tab.
    """
    print(f"[run_api] preliminary={preliminary!r} force={force!r} "
          f"use_kit={use_kit!r} submission_id={submission_id!r}", flush=True)
    # Untrusted callers can hit /run_api directly — profile/version flow
    # into the validator's argv, so validate them before use. Empty
    # falls back to the defaults (existing behavior).
    import re
    profile = profile or DEFAULT_PROFILE
    if profile not in PROFILE_CHOICES and profile.lower() != "auto":
        raise ValueError(f"invalid profile: {profile!r}")
    version = (version or DEFAULT_VERSION).strip()
    if not re.fullmatch(r"[\w.\-]+", version):
        raise ValueError(f"invalid version: {version!r}")
    result = run_validator(
        dataset=(dataset or "").strip(),
        profile=profile,
        version=version,
        open_pr=bool(open_pr),
        submission_id=(submission_id or "").strip(),
        force=bool(force),
        preliminary=bool(preliminary),
        use_kit=bool(use_kit),
    )
    return {
        "schema_version": 1,
        "dataset": result.dataset,
        "profile": result.profile,
        "version": result.version,
        "status": result.status,
        "summary": result.summary,
        "results_json": _sanitize_results_json(result.results_json),
        "pr_url": result.pr_url,
    }


def _list_profiles() -> dict:
    """Return the set of profiles that actually load on this Space's
    foundation+validator combination. The dashboard polls this to
    populate its dropdown so operators can't pick a profile that
    would fatally fail at registration time.

    Uses --use-plugin since the default CLI loader has known
    registration mismatches against the current foundation pin; the
    plugin path is what runner.py's streaming-zip flow falls back
    to and is the source of truth for "actually usable" here.

    Output format from validate.py is `PROFILE: <id> v<version>`
    per profile, one per line.
    """
    import subprocess, sys
    from runner import VALIDATOR
    try:
        proc = subprocess.run(
            # --list-profiles only ENUMERATES registered profiles from the
            # spec/plugin registry (--use-plugin) — it runs no validation
            # rules, so it never needs Kit. Force --no-use-kit: on a
            # Kit-enabled image the validator auto-enables --use-kit for the
            # PhysX-bearing default profile and boots the full Isaac Sim
            # runtime (~5 min) just to print the list, blowing the 300s
            # timeout below. Actual validation (runner.py) still uses Kit.
            [sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
            capture_output=True, text=True, timeout=300,
        )
        names: list[str] = []
        for line in (proc.stdout or "").splitlines():
            s = line.strip()
            # Validator emits "PROFILE: <id> v<version>" — that's our
            # only authoritative shape. Anything else is noise.
            if s.startswith("PROFILE:"):
                rest = s[len("PROFILE:"):].strip()
                pid = rest.split()[0] if rest else ""
                if pid:
                    names.append(pid)
        # Dedupe while preserving order.
        seen = set()
        unique = []
        for n in names:
            if n not in seen:
                seen.add(n)
                unique.append(n)
        result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
        if not unique:
            # No profiles registered AND no parse hits — surface why so
            # the dashboard can show something useful. Truncate so the
            # JSON response stays small.
            stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
            stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
            result["stderr_tail"] = stderr_tail
            result["stdout_tail"] = stdout_tail
        return result
    except subprocess.TimeoutExpired:
        return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
    except Exception as e:
        return {"profiles": [], "error": f"{type(e).__name__}: {e}"}


def _cancel_run(submission_id: str, run_token: str = "") -> dict:
    """Write the cancel-signal file for a given submission. The
    streaming-zip loop in runner.py checks for this file between zips
    and aborts when present. Idempotent — calling multiple times has no
    extra effect; consuming runner.py deletes it.

    `run_token` is the per-run token the dashboard read from get_progress.
    It becomes the flag's content so runner._is_cancelled only honors it
    for the exact run it was issued against — a flag left over from a
    prior run of this submission can never abort a fresh one."""
    sid = (submission_id or "").strip()
    if not sid:
        return {"state": "no_id"}
    path = cancel_path_for(sid)
    if path is None:
        return {"state": "no_id"}
    try:
        CANCEL_DIR.mkdir(parents=True, exist_ok=True)
        path.write_text((run_token or "").strip(), encoding="utf-8")
        return {"state": "signaled", "path": str(path)}
    except OSError as e:
        return {"state": "error", "error": f"{type(e).__name__}: {e}"}


def _get_progress(submission_id: str) -> dict:
    """Read the validator's per-asset progress file for this submission.

    Polled by the dashboard ~every 3 s while a Validate-now click is
    in-flight, so the "Validate now" button can fill up as the
    validator works through the asset list.

    Returns one of three shapes:
      - {"state": "not_found"} — no progress file (Space restarted, or
        the dashboard is polling a Space-run that never happened).
      - {"state": "starting"}  — file seeded by runner.py before the
        validator started its loop. processed/total are 0.
      - {processed, total, current, started_at, updated_at} — live
        per-asset progress written by validate.py._emit_progress.

    Every shape also carries `run_token` (the current run's cancel
    token, from the sidecar file) when one exists, so the dashboard can
    echo it back to cancel_run and target the exact run.

    Caller treats anything with total > 0 as "show the fill bar".
    """
    sid = (submission_id or "").strip()
    if not sid:
        return {"state": "no_id"}
    # Per-run cancel token (sidecar; see runner.run_token_path_for).
    # Surfaced on every shape so the dashboard can echo it back to
    # cancel_run — a cancel then only aborts the run it was issued
    # against, never a later one that reused the submission_id.
    run_token = ""
    tok_path = run_token_path_for(sid)
    if tok_path and tok_path.is_file():
        try:
            run_token = tok_path.read_text(encoding="utf-8").strip()
        except OSError:
            pass
    path = progress_path_for(sid)
    if path is None or not path.is_file():
        return {"state": "not_found", "run_token": run_token}
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
        if isinstance(data, dict) and run_token:
            data["run_token"] = run_token
        return data
    except (OSError, json.JSONDecodeError):
        # Mid-write — caller will poll again in a few seconds.
        return {"state": "transient", "run_token": run_token}


def _sanitize_results_json(raw: dict) -> dict:
    """Strip absolute filesystem paths from results_json before returning.

    Gradio's JSON serializer treats string fields that resolve to files
    on the Space's filesystem as downloadable references and tries to
    serve them through `/gradio_api/file=...`. The validator's
    results.json contains absolute paths (target dir + per-asset
    `path`) which point into the Space's ephemeral tempdir and are
    NOT exposed through gradio's allowed_paths — gradio_client then
    fails with 403 trying to auto-fetch them after a successful run.

    Callers don't need filesystem paths anyway — only `rel_path`
    (dataset-relative), `passed`, and `issues` are used downstream.
    Keep the rest of the report intact (profile_coverage, summary,
    layout_findings, etc.).
    """
    if not isinstance(raw, dict):
        return raw
    sanitized = {k: v for k, v in raw.items() if k != "target"}
    if "results" in sanitized and isinstance(sanitized["results"], list):
        sanitized["results"] = [
            {k: v for k, v in asset.items() if k != "path"}
            for asset in sanitized["results"]
            if isinstance(asset, dict)
        ]
    # Specs/dashboard dir paths are local to the Space, useless to caller.
    for k in ("specs_docs_dir", "dashboard_docs_dir"):
        sanitized.pop(k, None)
    return sanitized


def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
    """Generator that yields incremental log output to the UI as the
    validator runs. Gradio streams each yielded tuple to the connected
    outputs."""
    lines: list[str] = []

    def log(line: str) -> None:
        lines.append(line)

    yield "\n".join(lines), "", "(running…)", None

    try:
        result = run_validator(
            dataset=dataset.strip(),
            profile=profile,
            version=version.strip() or DEFAULT_VERSION,
            open_pr=open_pr,
            log=log,
        )
    except Exception as e:
        lines.append(f"\nERROR: {type(e).__name__}: {e}")
        yield "\n".join(lines), "", f"error: {e}", None
        return

    status_badge = f"**{result.status.upper()}** — {result.summary}"
    if result.pr_url:
        status_badge += f"\n\nPR: {result.pr_url}"

    report_index = result.report_path / "index.html"
    report_url = str(report_index) if report_index.is_file() else None

    yield (
        "\n".join(lines),
        status_badge,
        result.summary,
        report_url,
    )


def _read_md(name: str) -> str:
    """Return the contents of name (relative to this file's dir),
    stripping a leading YAML frontmatter block if present. Falls back
    to a friendly stub when the file is missing — keeps the Space
    bootable even before the space-deploy workflow has synced the
    assembled docs into the container."""
    from pathlib import Path
    p = Path(__file__).resolve().parent / name
    try:
        src = p.read_text(encoding="utf-8")
    except FileNotFoundError:
        return f"_{name} not yet synced into this Space — check back after the next deploy._"
    if src.startswith("---"):
        end = src.find("\n---\n", 4)
        if end > 0:
            src = src[end + len("\n---\n"):].lstrip()
    return src


with gr.Blocks(title="SimReady Validator") as demo:
    with gr.Tabs():
        with gr.Tab("Overview"):
            gr.Markdown(_read_md("README.md"))
        with gr.Tab("Validator"):
            gr.Markdown(
                "Submit a HuggingFace dataset to validate against a SimReady "
                "profile. With **Open PR** enabled, the verdict is uploaded "
                "back to the dataset as a `validation/` pull request."
            )
            with gr.Row():
                dataset = gr.Textbox(
                    label="Dataset",
                    placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
                )
            with gr.Row():
                profile = gr.Dropdown(
                    choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
                )
                version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
                open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
            run_btn = gr.Button("Validate", variant="primary")
            status_md = gr.Markdown(label="Verdict")
            summary_box = gr.Textbox(label="Summary", interactive=False)
            log_box = gr.Textbox(label="Log", lines=20, interactive=False)
            report_link = gr.File(label="HTML report (download)", interactive=False)
        with gr.Tab("Partner walkthrough"):
            gr.Markdown(_read_md("VALIDATE.md"))

    run_btn.click(
        fn=_run_streaming,
        inputs=[dataset, profile, version, open_pr],
        outputs=[log_box, status_md, summary_box, report_link],
        api_name="run",
    )

    # Programmatic endpoint — bound to invisible components so the UI
    # doesn't render anything extra, but the Gradio queue still exposes
    # an `/api/predict/run_api` route the gradio_client can hit. The
    # outputs[0] is the JSON return; api_name turns it into a stable
    # path the GitHub Actions caller depends on.
    api_dataset = gr.Textbox(visible=False)
    api_profile = gr.Textbox(visible=False)
    api_version = gr.Textbox(visible=False)
    api_open_pr = gr.Checkbox(visible=False)
    api_submission_id = gr.Textbox(visible=False)
    api_force = gr.Checkbox(visible=False)
    api_preliminary = gr.Checkbox(visible=False)
    api_use_kit = gr.Checkbox(visible=False)
    api_output  = gr.JSON(visible=False)
    api_button  = gr.Button(visible=False)
    api_button.click(
        fn=_run_api,
        inputs=[api_dataset, api_profile, api_version, api_open_pr,
                api_submission_id, api_force, api_preliminary, api_use_kit],
        outputs=api_output,
        api_name="run_api",
    )

    # Progress endpoint — polled by the dashboard while a row is
    # validating. CORS is open on /gradio_api/* by default, so the
    # browser can fetch this from github.io directly without any
    # GitHub-Actions side polling/commit churn.
    prog_in     = gr.Textbox(visible=False)
    prog_out    = gr.JSON(visible=False)
    prog_button = gr.Button(visible=False)
    prog_button.click(
        fn=_get_progress,
        inputs=[prog_in],
        outputs=prog_out,
        api_name="get_progress",
    )

    # Profile-listing endpoint — polled by the dashboard at startup
    # so its dropdown reflects what's actually loadable on this Space
    # right now (foundation+validator pin determines which profiles
    # register). Stops the operator from picking something that
    # would fatal at runtime.
    profiles_out    = gr.JSON(visible=False)
    profiles_button = gr.Button(visible=False)
    profiles_button.click(
        fn=_list_profiles,
        inputs=None,
        outputs=profiles_out,
        api_name="list_profiles",
    )

    # Cancel endpoint — the dashboard's Cancel button calls this AFTER
    # cancelling the GH Action so the in-flight server-side gradio call
    # actually stops (cancelling the Action alone only kills the
    # gradio_client wrapper, the Space's loop keeps going).
    cancel_in     = gr.Textbox(visible=False)
    cancel_token  = gr.Textbox(visible=False)
    cancel_out    = gr.JSON(visible=False)
    cancel_button = gr.Button(visible=False)
    cancel_button.click(
        fn=_cancel_run,
        inputs=[cancel_in, cancel_token],
        outputs=cancel_out,
        api_name="cancel_run",
    )


if __name__ == "__main__":
    demo.queue().launch(
        server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
        server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
    )