Spaces:

nvidia
/

simready-validator

Sleeping

loginowskid commited on 16 days ago

Commit

4e38f5d

1 Parent(s): 618cfad

Add per-asset progress endpoint for the dashboard's progress bar

Source: simready-oem-library-pm@1c4d7c0

- runner.py: accept submission_id; seed /tmp/sr-progress/<id>.json
and pass --progress-file to the validator subprocess.
- app.py: /run_api takes a 5th positional (submission_id); new
/get_progress endpoint reads the file so the dashboard can poll
this Space directly while a validation is in flight.
- validate.py: --progress-file PATH; atomic JSON write after each
asset in both the sequential and parallel paths.

No build-context changes to requirements / Dockerfile beyond a stale
build-id comment removal — pip layer stays cached, only the app.py /
runner.py / validate.py COPYs invalidate, so build should be ~1 min.

Files changed (6) hide show

tools/hf_space/Dockerfile +12 -4
tools/hf_space/README.md +9 -2
tools/hf_space/app.py +55 -3
tools/hf_space/requirements.txt +0 -1
tools/hf_space/runner.py +26 -0
tools/validation/plugins/simready-report/skills/simready-report/validate.py +48 -2

tools/hf_space/Dockerfile CHANGED Viewed

@@ -32,12 +32,20 @@ WORKDIR /home/${USER}/app
 COPY tools/hf_space/requirements.txt ./requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
-# Foundation specs: the validator wants SIMREADY_FOUNDATIONS_PATH to
-# point at a checkout. Pinned tag avoids surprise spec churn between
-# Space builds. Bump the tag deliberately when foundation rules change.
 ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
-RUN git clone --depth 1 https://github.com/NVIDIA/simready-foundation \
         ${SIMREADY_FOUNDATIONS_PATH} \
     && chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
 # Copy the bundled validator (the same code our DGXC runner uses) and

 COPY tools/hf_space/requirements.txt ./requirements.txt
 RUN pip install --no-cache-dir -r requirements.txt
+# Foundation specs: pinned to a specific commit so spec churn upstream
+# doesn't break the Space's compatibility with the pinned simready-
+# validate package. The validator's loader rejects features whose
+# requirement codes aren't registered, and the foundation repo's main
+# branch has moved ahead of what simready-validate 2026.4.x understands
+# — sticking to the 2026.04.0 release tag (commit 805d2c5) matches the
+# DGXC runner's working setup. Bump deliberately when both the spec
+# and the validator package are tested together.
 ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
+ENV SIMREADY_FOUNDATIONS_COMMIT=805d2c50179a9878c89b0f41baaa0ecafe47c3d7
+RUN git clone https://github.com/NVIDIA/simready-foundation \
         ${SIMREADY_FOUNDATIONS_PATH} \
+    && cd ${SIMREADY_FOUNDATIONS_PATH} \
+    && git checkout ${SIMREADY_FOUNDATIONS_COMMIT} \
     && chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
 # Copy the bundled validator (the same code our DGXC runner uses) and

tools/hf_space/README.md CHANGED Viewed

@@ -66,8 +66,15 @@ Set the tier in the Space's **Settings → Hardware** page (or in
 ## Deploy
-The Space is **not deployed yet** — this is a scaffold living in the
-internal repo. To stand it up:
 ### 1. Create the Space `[BROWSER]`

 ## Deploy
+This dance is captured as a Claude Code skill at
+[`skills/deploy-hf-space/SKILL.md`](./skills/deploy-hf-space/SKILL.md).
+Future operators can run `/deploy-hf-space [<slug>]` instead of
+following this README by hand. The README below is the human-readable
+mirror.
+The Space is currently live at
+[`nvidia/simready-validator`](https://huggingface.co/spaces/nvidia/simready-validator).
+To re-stand it up from scratch:
 ### 1. Create the Space `[BROWSER]`

tools/hf_space/app.py CHANGED Viewed

@@ -20,12 +20,13 @@ the Space's own token opens the verdict PR.
 from __future__ import annotations
 import dataclasses
 import os
 from pathlib import Path
 import gradio as gr
-from runner import run as run_validator
 PROFILE_CHOICES = [
@@ -42,7 +43,8 @@ DEFAULT_PROFILE = "Prop-Robotics-Neutral"
 DEFAULT_VERSION = "1.0.0"
-def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
     """Programmatic endpoint. Returns the RunResult as a JSON dict.
     Caller is typically `tools/hf_watch/call_hf_space.py` running from
@@ -51,12 +53,17 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
     pattern-matches on the same field names `tools/hf_watch/validate.py`
     produces, so status.json patching is identical regardless of which
     backend ran the validation.
     """
     result = run_validator(
         dataset=(dataset or "").strip(),
         profile=profile or DEFAULT_PROFILE,
         version=(version or DEFAULT_VERSION).strip(),
         open_pr=bool(open_pr),
     )
     return {
         "schema_version": 1,
@@ -70,6 +77,36 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
     }
 def _sanitize_results_json(raw: dict) -> dict:
     """Strip absolute filesystem paths from results_json before returning.
@@ -181,15 +218,30 @@ with gr.Blocks(title="SimReady Validator") as demo:
     api_profile = gr.Textbox(visible=False)
     api_version = gr.Textbox(visible=False)
     api_open_pr = gr.Checkbox(visible=False)
     api_output  = gr.JSON(visible=False)
     api_button  = gr.Button(visible=False)
     api_button.click(
         fn=_run_api,
-        inputs=[api_dataset, api_profile, api_version, api_open_pr],
         outputs=api_output,
         api_name="run_api",
     )
 if __name__ == "__main__":
     demo.queue().launch(

 from __future__ import annotations
 import dataclasses
+import json
 import os
 from pathlib import Path
 import gradio as gr
+from runner import run as run_validator, progress_path_for
 PROFILE_CHOICES = [
 DEFAULT_VERSION = "1.0.0"
+def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
+             submission_id: str = "") -> dict:
     """Programmatic endpoint. Returns the RunResult as a JSON dict.
     Caller is typically `tools/hf_watch/call_hf_space.py` running from
     pattern-matches on the same field names `tools/hf_watch/validate.py`
     produces, so status.json patching is identical regardless of which
     backend ran the validation.
+    `submission_id` is optional — when set, the validator writes
+    per-asset progress to /tmp/sr-progress/<id>.json, which the
+    get_progress endpoint serves to the dashboard.
     """
     result = run_validator(
         dataset=(dataset or "").strip(),
         profile=profile or DEFAULT_PROFILE,
         version=(version or DEFAULT_VERSION).strip(),
         open_pr=bool(open_pr),
+        submission_id=(submission_id or "").strip(),
     )
     return {
         "schema_version": 1,
     }
+def _get_progress(submission_id: str) -> dict:
+    """Read the validator's per-asset progress file for this submission.
+    Polled by the dashboard ~every 3 s while a Validate-now click is
+    in-flight, so the "Validate now" button can fill up as the
+    validator works through the asset list.
+    Returns one of three shapes:
+      - {"state": "not_found"} — no progress file (Space restarted, or
+        the dashboard is polling a Space-run that never happened).
+      - {"state": "starting"}  — file seeded by runner.py before the
+        validator started its loop. processed/total are 0.
+      - {processed, total, current, started_at, updated_at} — live
+        per-asset progress written by validate.py._emit_progress.
+    Caller treats anything with total > 0 as "show the fill bar".
+    """
+    sid = (submission_id or "").strip()
+    if not sid:
+        return {"state": "no_id"}
+    path = progress_path_for(sid)
+    if path is None or not path.is_file():
+        return {"state": "not_found"}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError):
+        # Mid-write — caller will poll again in a few seconds.
+        return {"state": "transient"}
 def _sanitize_results_json(raw: dict) -> dict:
     """Strip absolute filesystem paths from results_json before returning.
     api_profile = gr.Textbox(visible=False)
     api_version = gr.Textbox(visible=False)
     api_open_pr = gr.Checkbox(visible=False)
+    api_submission_id = gr.Textbox(visible=False)
     api_output  = gr.JSON(visible=False)
     api_button  = gr.Button(visible=False)
     api_button.click(
         fn=_run_api,
+        inputs=[api_dataset, api_profile, api_version, api_open_pr, api_submission_id],
         outputs=api_output,
         api_name="run_api",
     )
+    # Progress endpoint — polled by the dashboard while a row is
+    # validating. CORS is open on /gradio_api/* by default, so the
+    # browser can fetch this from github.io directly without any
+    # GitHub-Actions side polling/commit churn.
+    prog_in     = gr.Textbox(visible=False)
+    prog_out    = gr.JSON(visible=False)
+    prog_button = gr.Button(visible=False)
+    prog_button.click(
+        fn=_get_progress,
+        inputs=[prog_in],
+        outputs=prog_out,
+        api_name="get_progress",
+    )
 if __name__ == "__main__":
     demo.queue().launch(

tools/hf_space/requirements.txt CHANGED Viewed

@@ -18,4 +18,3 @@ omniverse-asset-validator==1.15.1
 omniverse-usd-profiles==1.10.22
 markdown-it-py>=4.2
 simready-validate==2026.4.9
-# build-id: 08b20703-0e97-4904-a7af-d2e0b3a42337

 omniverse-usd-profiles==1.10.22
 markdown-it-py>=4.2
 simready-validate==2026.4.9

tools/hf_space/runner.py CHANGED Viewed

@@ -152,6 +152,19 @@ def _open_verdict_pr(
     return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
 def run(
     dataset: str,
     profile: str = "Robot-Body-Runnable",
@@ -159,6 +172,7 @@ def run(
     open_pr: bool = False,
     hf_token: str | None = None,
     log: Iterator[str] | None = None,
 ) -> RunResult:
     """Validate a single HF dataset. Yields log lines via the `log` callable.
@@ -207,6 +221,18 @@ def run(
             "--output", str(out_dir),
             "--no-use-kit",
         ]
         out(f"  $ {shlex.join(cmd)}")
         # Capture stdout+stderr to a file so we can ship the tail back
         # in the response when the validator crashes. Streaming the

     return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
+PROGRESS_DIR = Path("/tmp/sr-progress")
+def progress_path_for(submission_id: str) -> Path:
+    """Where the validator writes per-asset progress for this submission.
+    Read by the Space's get_progress endpoint to feed the dashboard's
+    fill-up progress bar. Empty submission_id → None (caller skips)."""
+    if not submission_id:
+        return None  # type: ignore[return-value]
+    safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in submission_id)
+    return PROGRESS_DIR / f"{safe}.json"
 def run(
     dataset: str,
     profile: str = "Robot-Body-Runnable",
     open_pr: bool = False,
     hf_token: str | None = None,
     log: Iterator[str] | None = None,
+    submission_id: str = "",
 ) -> RunResult:
     """Validate a single HF dataset. Yields log lines via the `log` callable.
             "--output", str(out_dir),
             "--no-use-kit",
         ]
+        prog_path = progress_path_for(submission_id) if submission_id else None
+        if prog_path:
+            PROGRESS_DIR.mkdir(parents=True, exist_ok=True)
+            # Seed an empty progress file so the dashboard sees "queued"
+            # immediately instead of "not found" while the validator
+            # boots (foundation-spec load is ~2 min).
+            prog_path.write_text(json.dumps({
+                "processed": 0, "total": 0, "current": None,
+                "started_at": _now(), "updated_at": _now(),
+                "state": "starting",
+            }))
+            cmd += ["--progress-file", str(prog_path)]
         out(f"  $ {shlex.join(cmd)}")
         # Capture stdout+stderr to a file so we can ship the tail back
         # in the response when the validator crashes. Streaming the

tools/validation/plugins/simready-report/skills/simready-report/validate.py CHANGED Viewed

@@ -33,6 +33,35 @@ _PHYSX_PROFILES = frozenset({
 })
 def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
     """Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
     flag = f"--{name}"
@@ -1190,6 +1219,12 @@ def main() -> int:
                          "robot-named files to Robot-Body-* and prop-named files to "
                          "Prop-Robotics-* in the same run, instead of one blanket "
                          "profile per dataset.")
     args = ap.parse_args()
     # Parse the per-asset profile map up front so a malformed JSON
@@ -1338,12 +1373,16 @@ def main() -> int:
         profile_req_count = sum(len(f.requirements) for f in default_profile.features)
         print(f"  profile features: {len(default_profile.features)}", flush=True)
         print(f"  profile requirements: {profile_req_count}", flush=True)
-        for asset in assets:
             pid = _profile_for(asset)
             if pid not in engines:
                 engines[pid] = build_engine(pid, args.version)
                 print(f"  [profile-map] built engine for {pid}", flush=True)
             engine, profile = engines[pid]
             try:
                 results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
             except Exception as e:
@@ -1351,12 +1390,13 @@ def main() -> int:
                 ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
                 results.append({
                     "name": asset.name, "path": str(asset),
-                    "rel_path": str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset),
                     "profile": pid, "profile_version": args.version,
                     "passed": False,
                     "issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
                     "passed_features": [], "failed_features": [], "affected_prims": [],
                 })
     else:
         # Parallel path. Workers load specs once via _pool_init, then
         # lazy-build (and cache) engines per profile_id as tasks arrive.
@@ -1365,6 +1405,8 @@ def main() -> int:
         profile_req_count = sum(len(f.requirements) for f in profile.features)
         print(f"  profile features: {len(profile.features)}", flush=True)
         print(f"  profile requirements: {profile_req_count}", flush=True)
         with ProcessPoolExecutor(
             max_workers=workers,
             initializer=_pool_init,
@@ -1374,8 +1416,12 @@ def main() -> int:
                 ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
                 for a in assets
             }
             for fut in as_completed(futures):
                 asset = futures[fut]
                 try:
                     result, ext_records, issues = fut.result()
                 except Exception as e:

 })
+def _emit_progress(path: str | None, processed: int, total: int,
+                   current: str | None, started_at: str) -> None:
+    """Atomically write progress JSON for the HF Space's get_progress
+    endpoint. No-op when --progress-file isn't set (CLI / DGXC paths).
+    Atomicity matters because the Space serves polls concurrently with
+    the validator's writes — a half-written file would 500 the endpoint.
+    Write to a sibling .tmp and os.replace (POSIX atomic on same FS)."""
+    if not path:
+        return
+    import os, tempfile, datetime
+    payload = {
+        "processed": processed,
+        "total": total,
+        "current": current,
+        "started_at": started_at,
+        "updated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds"),
+    }
+    try:
+        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
+        fd, tmp = tempfile.mkstemp(prefix=".progress-", dir=os.path.dirname(path) or ".")
+        with os.fdopen(fd, "w", encoding="utf-8") as f:
+            json.dump(payload, f)
+        os.replace(tmp, path)
+    except OSError:
+        # Progress is advisory — don't crash the validator if the FS hiccups.
+        pass
 def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
     """Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
     flag = f"--{name}"
                          "robot-named files to Robot-Body-* and prop-named files to "
                          "Prop-Robotics-* in the same run, instead of one blanket "
                          "profile per dataset.")
+    ap.add_argument("--progress-file", default=None,
+                    help="Path to a JSON file the validator updates after each "
+                         "asset with {processed, total, current, started_at, "
+                         "updated_at}. Polled by the HF Space's get_progress "
+                         "endpoint so the dashboard can show a real-time bar. "
+                         "Writes are atomic (tmp + rename).")
     args = ap.parse_args()
     # Parse the per-asset profile map up front so a malformed JSON
         profile_req_count = sum(len(f.requirements) for f in default_profile.features)
         print(f"  profile features: {len(default_profile.features)}", flush=True)
         print(f"  profile requirements: {profile_req_count}", flush=True)
+        progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
+        _emit_progress(args.progress_file, 0, len(assets), None, progress_started)
+        for i, asset in enumerate(assets):
             pid = _profile_for(asset)
             if pid not in engines:
                 engines[pid] = build_engine(pid, args.version)
                 print(f"  [profile-map] built engine for {pid}", flush=True)
             engine, profile = engines[pid]
+            rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
+            _emit_progress(args.progress_file, i, len(assets), rel, progress_started)
             try:
                 results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
             except Exception as e:
                 ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
                 results.append({
                     "name": asset.name, "path": str(asset),
+                    "rel_path": rel,
                     "profile": pid, "profile_version": args.version,
                     "passed": False,
                     "issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
                     "passed_features": [], "failed_features": [], "affected_prims": [],
                 })
+        _emit_progress(args.progress_file, len(assets), len(assets), None, progress_started)
     else:
         # Parallel path. Workers load specs once via _pool_init, then
         # lazy-build (and cache) engines per profile_id as tasks arrive.
         profile_req_count = sum(len(f.requirements) for f in profile.features)
         print(f"  profile features: {len(profile.features)}", flush=True)
         print(f"  profile requirements: {profile_req_count}", flush=True)
+        progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
+        _emit_progress(args.progress_file, 0, len(assets), None, progress_started)
         with ProcessPoolExecutor(
             max_workers=workers,
             initializer=_pool_init,
                 ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
                 for a in assets
             }
+            completed = 0
             for fut in as_completed(futures):
                 asset = futures[fut]
+                completed += 1
+                rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
+                _emit_progress(args.progress_file, completed, len(assets), rel, progress_started)
                 try:
                     result, ext_records, issues = fut.result()
                 except Exception as e: