loginowskid commited on
Commit
4e38f5d
·
1 Parent(s): 618cfad

Add per-asset progress endpoint for the dashboard's progress bar

Browse files

Source: simready-oem-library-pm@1c4d7c0

- runner.py: accept submission_id; seed /tmp/sr-progress/<id>.json
and pass --progress-file to the validator subprocess.
- app.py: /run_api takes a 5th positional (submission_id); new
/get_progress endpoint reads the file so the dashboard can poll
this Space directly while a validation is in flight.
- validate.py: --progress-file PATH; atomic JSON write after each
asset in both the sequential and parallel paths.

No build-context changes to requirements / Dockerfile beyond a stale
build-id comment removal — pip layer stays cached, only the app.py /
runner.py / validate.py COPYs invalidate, so build should be ~1 min.

tools/hf_space/Dockerfile CHANGED
@@ -32,12 +32,20 @@ WORKDIR /home/${USER}/app
32
  COPY tools/hf_space/requirements.txt ./requirements.txt
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
- # Foundation specs: the validator wants SIMREADY_FOUNDATIONS_PATH to
36
- # point at a checkout. Pinned tag avoids surprise spec churn between
37
- # Space builds. Bump the tag deliberately when foundation rules change.
 
 
 
 
 
38
  ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
39
- RUN git clone --depth 1 https://github.com/NVIDIA/simready-foundation \
 
40
  ${SIMREADY_FOUNDATIONS_PATH} \
 
 
41
  && chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
42
 
43
  # Copy the bundled validator (the same code our DGXC runner uses) and
 
32
  COPY tools/hf_space/requirements.txt ./requirements.txt
33
  RUN pip install --no-cache-dir -r requirements.txt
34
 
35
+ # Foundation specs: pinned to a specific commit so spec churn upstream
36
+ # doesn't break the Space's compatibility with the pinned simready-
37
+ # validate package. The validator's loader rejects features whose
38
+ # requirement codes aren't registered, and the foundation repo's main
39
+ # branch has moved ahead of what simready-validate 2026.4.x understands
40
+ # — sticking to the 2026.04.0 release tag (commit 805d2c5) matches the
41
+ # DGXC runner's working setup. Bump deliberately when both the spec
42
+ # and the validator package are tested together.
43
  ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
44
+ ENV SIMREADY_FOUNDATIONS_COMMIT=805d2c50179a9878c89b0f41baaa0ecafe47c3d7
45
+ RUN git clone https://github.com/NVIDIA/simready-foundation \
46
  ${SIMREADY_FOUNDATIONS_PATH} \
47
+ && cd ${SIMREADY_FOUNDATIONS_PATH} \
48
+ && git checkout ${SIMREADY_FOUNDATIONS_COMMIT} \
49
  && chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
50
 
51
  # Copy the bundled validator (the same code our DGXC runner uses) and
tools/hf_space/README.md CHANGED
@@ -66,8 +66,15 @@ Set the tier in the Space's **Settings → Hardware** page (or in
66
 
67
  ## Deploy
68
 
69
- The Space is **not deployed yet** — this is a scaffold living in the
70
- internal repo. To stand it up:
 
 
 
 
 
 
 
71
 
72
  ### 1. Create the Space `[BROWSER]`
73
 
 
66
 
67
  ## Deploy
68
 
69
+ This dance is captured as a Claude Code skill at
70
+ [`skills/deploy-hf-space/SKILL.md`](./skills/deploy-hf-space/SKILL.md).
71
+ Future operators can run `/deploy-hf-space [<slug>]` instead of
72
+ following this README by hand. The README below is the human-readable
73
+ mirror.
74
+
75
+ The Space is currently live at
76
+ [`nvidia/simready-validator`](https://huggingface.co/spaces/nvidia/simready-validator).
77
+ To re-stand it up from scratch:
78
 
79
  ### 1. Create the Space `[BROWSER]`
80
 
tools/hf_space/app.py CHANGED
@@ -20,12 +20,13 @@ the Space's own token opens the verdict PR.
20
  from __future__ import annotations
21
 
22
  import dataclasses
 
23
  import os
24
  from pathlib import Path
25
 
26
  import gradio as gr
27
 
28
- from runner import run as run_validator
29
 
30
 
31
  PROFILE_CHOICES = [
@@ -42,7 +43,8 @@ DEFAULT_PROFILE = "Prop-Robotics-Neutral"
42
  DEFAULT_VERSION = "1.0.0"
43
 
44
 
45
- def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
 
46
  """Programmatic endpoint. Returns the RunResult as a JSON dict.
47
 
48
  Caller is typically `tools/hf_watch/call_hf_space.py` running from
@@ -51,12 +53,17 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
51
  pattern-matches on the same field names `tools/hf_watch/validate.py`
52
  produces, so status.json patching is identical regardless of which
53
  backend ran the validation.
 
 
 
 
54
  """
55
  result = run_validator(
56
  dataset=(dataset or "").strip(),
57
  profile=profile or DEFAULT_PROFILE,
58
  version=(version or DEFAULT_VERSION).strip(),
59
  open_pr=bool(open_pr),
 
60
  )
61
  return {
62
  "schema_version": 1,
@@ -70,6 +77,36 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
70
  }
71
 
72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  def _sanitize_results_json(raw: dict) -> dict:
74
  """Strip absolute filesystem paths from results_json before returning.
75
 
@@ -181,15 +218,30 @@ with gr.Blocks(title="SimReady Validator") as demo:
181
  api_profile = gr.Textbox(visible=False)
182
  api_version = gr.Textbox(visible=False)
183
  api_open_pr = gr.Checkbox(visible=False)
 
184
  api_output = gr.JSON(visible=False)
185
  api_button = gr.Button(visible=False)
186
  api_button.click(
187
  fn=_run_api,
188
- inputs=[api_dataset, api_profile, api_version, api_open_pr],
189
  outputs=api_output,
190
  api_name="run_api",
191
  )
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  if __name__ == "__main__":
195
  demo.queue().launch(
 
20
  from __future__ import annotations
21
 
22
  import dataclasses
23
+ import json
24
  import os
25
  from pathlib import Path
26
 
27
  import gradio as gr
28
 
29
+ from runner import run as run_validator, progress_path_for
30
 
31
 
32
  PROFILE_CHOICES = [
 
43
  DEFAULT_VERSION = "1.0.0"
44
 
45
 
46
+ def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
47
+ submission_id: str = "") -> dict:
48
  """Programmatic endpoint. Returns the RunResult as a JSON dict.
49
 
50
  Caller is typically `tools/hf_watch/call_hf_space.py` running from
 
53
  pattern-matches on the same field names `tools/hf_watch/validate.py`
54
  produces, so status.json patching is identical regardless of which
55
  backend ran the validation.
56
+
57
+ `submission_id` is optional — when set, the validator writes
58
+ per-asset progress to /tmp/sr-progress/<id>.json, which the
59
+ get_progress endpoint serves to the dashboard.
60
  """
61
  result = run_validator(
62
  dataset=(dataset or "").strip(),
63
  profile=profile or DEFAULT_PROFILE,
64
  version=(version or DEFAULT_VERSION).strip(),
65
  open_pr=bool(open_pr),
66
+ submission_id=(submission_id or "").strip(),
67
  )
68
  return {
69
  "schema_version": 1,
 
77
  }
78
 
79
 
80
+ def _get_progress(submission_id: str) -> dict:
81
+ """Read the validator's per-asset progress file for this submission.
82
+
83
+ Polled by the dashboard ~every 3 s while a Validate-now click is
84
+ in-flight, so the "Validate now" button can fill up as the
85
+ validator works through the asset list.
86
+
87
+ Returns one of three shapes:
88
+ - {"state": "not_found"} — no progress file (Space restarted, or
89
+ the dashboard is polling a Space-run that never happened).
90
+ - {"state": "starting"} — file seeded by runner.py before the
91
+ validator started its loop. processed/total are 0.
92
+ - {processed, total, current, started_at, updated_at} — live
93
+ per-asset progress written by validate.py._emit_progress.
94
+
95
+ Caller treats anything with total > 0 as "show the fill bar".
96
+ """
97
+ sid = (submission_id or "").strip()
98
+ if not sid:
99
+ return {"state": "no_id"}
100
+ path = progress_path_for(sid)
101
+ if path is None or not path.is_file():
102
+ return {"state": "not_found"}
103
+ try:
104
+ return json.loads(path.read_text(encoding="utf-8"))
105
+ except (OSError, json.JSONDecodeError):
106
+ # Mid-write — caller will poll again in a few seconds.
107
+ return {"state": "transient"}
108
+
109
+
110
  def _sanitize_results_json(raw: dict) -> dict:
111
  """Strip absolute filesystem paths from results_json before returning.
112
 
 
218
  api_profile = gr.Textbox(visible=False)
219
  api_version = gr.Textbox(visible=False)
220
  api_open_pr = gr.Checkbox(visible=False)
221
+ api_submission_id = gr.Textbox(visible=False)
222
  api_output = gr.JSON(visible=False)
223
  api_button = gr.Button(visible=False)
224
  api_button.click(
225
  fn=_run_api,
226
+ inputs=[api_dataset, api_profile, api_version, api_open_pr, api_submission_id],
227
  outputs=api_output,
228
  api_name="run_api",
229
  )
230
 
231
+ # Progress endpoint — polled by the dashboard while a row is
232
+ # validating. CORS is open on /gradio_api/* by default, so the
233
+ # browser can fetch this from github.io directly without any
234
+ # GitHub-Actions side polling/commit churn.
235
+ prog_in = gr.Textbox(visible=False)
236
+ prog_out = gr.JSON(visible=False)
237
+ prog_button = gr.Button(visible=False)
238
+ prog_button.click(
239
+ fn=_get_progress,
240
+ inputs=[prog_in],
241
+ outputs=prog_out,
242
+ api_name="get_progress",
243
+ )
244
+
245
 
246
  if __name__ == "__main__":
247
  demo.queue().launch(
tools/hf_space/requirements.txt CHANGED
@@ -18,4 +18,3 @@ omniverse-asset-validator==1.15.1
18
  omniverse-usd-profiles==1.10.22
19
  markdown-it-py>=4.2
20
  simready-validate==2026.4.9
21
- # build-id: 08b20703-0e97-4904-a7af-d2e0b3a42337
 
18
  omniverse-usd-profiles==1.10.22
19
  markdown-it-py>=4.2
20
  simready-validate==2026.4.9
 
tools/hf_space/runner.py CHANGED
@@ -152,6 +152,19 @@ def _open_verdict_pr(
152
  return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
153
 
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  def run(
156
  dataset: str,
157
  profile: str = "Robot-Body-Runnable",
@@ -159,6 +172,7 @@ def run(
159
  open_pr: bool = False,
160
  hf_token: str | None = None,
161
  log: Iterator[str] | None = None,
 
162
  ) -> RunResult:
163
  """Validate a single HF dataset. Yields log lines via the `log` callable.
164
 
@@ -207,6 +221,18 @@ def run(
207
  "--output", str(out_dir),
208
  "--no-use-kit",
209
  ]
 
 
 
 
 
 
 
 
 
 
 
 
210
  out(f" $ {shlex.join(cmd)}")
211
  # Capture stdout+stderr to a file so we can ship the tail back
212
  # in the response when the validator crashes. Streaming the
 
152
  return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
153
 
154
 
155
+ PROGRESS_DIR = Path("/tmp/sr-progress")
156
+
157
+
158
+ def progress_path_for(submission_id: str) -> Path:
159
+ """Where the validator writes per-asset progress for this submission.
160
+ Read by the Space's get_progress endpoint to feed the dashboard's
161
+ fill-up progress bar. Empty submission_id → None (caller skips)."""
162
+ if not submission_id:
163
+ return None # type: ignore[return-value]
164
+ safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in submission_id)
165
+ return PROGRESS_DIR / f"{safe}.json"
166
+
167
+
168
  def run(
169
  dataset: str,
170
  profile: str = "Robot-Body-Runnable",
 
172
  open_pr: bool = False,
173
  hf_token: str | None = None,
174
  log: Iterator[str] | None = None,
175
+ submission_id: str = "",
176
  ) -> RunResult:
177
  """Validate a single HF dataset. Yields log lines via the `log` callable.
178
 
 
221
  "--output", str(out_dir),
222
  "--no-use-kit",
223
  ]
224
+ prog_path = progress_path_for(submission_id) if submission_id else None
225
+ if prog_path:
226
+ PROGRESS_DIR.mkdir(parents=True, exist_ok=True)
227
+ # Seed an empty progress file so the dashboard sees "queued"
228
+ # immediately instead of "not found" while the validator
229
+ # boots (foundation-spec load is ~2 min).
230
+ prog_path.write_text(json.dumps({
231
+ "processed": 0, "total": 0, "current": None,
232
+ "started_at": _now(), "updated_at": _now(),
233
+ "state": "starting",
234
+ }))
235
+ cmd += ["--progress-file", str(prog_path)]
236
  out(f" $ {shlex.join(cmd)}")
237
  # Capture stdout+stderr to a file so we can ship the tail back
238
  # in the response when the validator crashes. Streaming the
tools/validation/plugins/simready-report/skills/simready-report/validate.py CHANGED
@@ -33,6 +33,35 @@ _PHYSX_PROFILES = frozenset({
33
  })
34
 
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
37
  """Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
38
  flag = f"--{name}"
@@ -1190,6 +1219,12 @@ def main() -> int:
1190
  "robot-named files to Robot-Body-* and prop-named files to "
1191
  "Prop-Robotics-* in the same run, instead of one blanket "
1192
  "profile per dataset.")
 
 
 
 
 
 
1193
  args = ap.parse_args()
1194
 
1195
  # Parse the per-asset profile map up front so a malformed JSON
@@ -1338,12 +1373,16 @@ def main() -> int:
1338
  profile_req_count = sum(len(f.requirements) for f in default_profile.features)
1339
  print(f" profile features: {len(default_profile.features)}", flush=True)
1340
  print(f" profile requirements: {profile_req_count}", flush=True)
1341
- for asset in assets:
 
 
1342
  pid = _profile_for(asset)
1343
  if pid not in engines:
1344
  engines[pid] = build_engine(pid, args.version)
1345
  print(f" [profile-map] built engine for {pid}", flush=True)
1346
  engine, profile = engines[pid]
 
 
1347
  try:
1348
  results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
1349
  except Exception as e:
@@ -1351,12 +1390,13 @@ def main() -> int:
1351
  ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
1352
  results.append({
1353
  "name": asset.name, "path": str(asset),
1354
- "rel_path": str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset),
1355
  "profile": pid, "profile_version": args.version,
1356
  "passed": False,
1357
  "issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
1358
  "passed_features": [], "failed_features": [], "affected_prims": [],
1359
  })
 
1360
  else:
1361
  # Parallel path. Workers load specs once via _pool_init, then
1362
  # lazy-build (and cache) engines per profile_id as tasks arrive.
@@ -1365,6 +1405,8 @@ def main() -> int:
1365
  profile_req_count = sum(len(f.requirements) for f in profile.features)
1366
  print(f" profile features: {len(profile.features)}", flush=True)
1367
  print(f" profile requirements: {profile_req_count}", flush=True)
 
 
1368
  with ProcessPoolExecutor(
1369
  max_workers=workers,
1370
  initializer=_pool_init,
@@ -1374,8 +1416,12 @@ def main() -> int:
1374
  ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
1375
  for a in assets
1376
  }
 
1377
  for fut in as_completed(futures):
1378
  asset = futures[fut]
 
 
 
1379
  try:
1380
  result, ext_records, issues = fut.result()
1381
  except Exception as e:
 
33
  })
34
 
35
 
36
+ def _emit_progress(path: str | None, processed: int, total: int,
37
+ current: str | None, started_at: str) -> None:
38
+ """Atomically write progress JSON for the HF Space's get_progress
39
+ endpoint. No-op when --progress-file isn't set (CLI / DGXC paths).
40
+
41
+ Atomicity matters because the Space serves polls concurrently with
42
+ the validator's writes — a half-written file would 500 the endpoint.
43
+ Write to a sibling .tmp and os.replace (POSIX atomic on same FS)."""
44
+ if not path:
45
+ return
46
+ import os, tempfile, datetime
47
+ payload = {
48
+ "processed": processed,
49
+ "total": total,
50
+ "current": current,
51
+ "started_at": started_at,
52
+ "updated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds"),
53
+ }
54
+ try:
55
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
56
+ fd, tmp = tempfile.mkstemp(prefix=".progress-", dir=os.path.dirname(path) or ".")
57
+ with os.fdopen(fd, "w", encoding="utf-8") as f:
58
+ json.dump(payload, f)
59
+ os.replace(tmp, path)
60
+ except OSError:
61
+ # Progress is advisory — don't crash the validator if the FS hiccups.
62
+ pass
63
+
64
+
65
  def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
66
  """Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
67
  flag = f"--{name}"
 
1219
  "robot-named files to Robot-Body-* and prop-named files to "
1220
  "Prop-Robotics-* in the same run, instead of one blanket "
1221
  "profile per dataset.")
1222
+ ap.add_argument("--progress-file", default=None,
1223
+ help="Path to a JSON file the validator updates after each "
1224
+ "asset with {processed, total, current, started_at, "
1225
+ "updated_at}. Polled by the HF Space's get_progress "
1226
+ "endpoint so the dashboard can show a real-time bar. "
1227
+ "Writes are atomic (tmp + rename).")
1228
  args = ap.parse_args()
1229
 
1230
  # Parse the per-asset profile map up front so a malformed JSON
 
1373
  profile_req_count = sum(len(f.requirements) for f in default_profile.features)
1374
  print(f" profile features: {len(default_profile.features)}", flush=True)
1375
  print(f" profile requirements: {profile_req_count}", flush=True)
1376
+ progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
1377
+ _emit_progress(args.progress_file, 0, len(assets), None, progress_started)
1378
+ for i, asset in enumerate(assets):
1379
  pid = _profile_for(asset)
1380
  if pid not in engines:
1381
  engines[pid] = build_engine(pid, args.version)
1382
  print(f" [profile-map] built engine for {pid}", flush=True)
1383
  engine, profile = engines[pid]
1384
+ rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
1385
+ _emit_progress(args.progress_file, i, len(assets), rel, progress_started)
1386
  try:
1387
  results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
1388
  except Exception as e:
 
1390
  ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
1391
  results.append({
1392
  "name": asset.name, "path": str(asset),
1393
+ "rel_path": rel,
1394
  "profile": pid, "profile_version": args.version,
1395
  "passed": False,
1396
  "issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
1397
  "passed_features": [], "failed_features": [], "affected_prims": [],
1398
  })
1399
+ _emit_progress(args.progress_file, len(assets), len(assets), None, progress_started)
1400
  else:
1401
  # Parallel path. Workers load specs once via _pool_init, then
1402
  # lazy-build (and cache) engines per profile_id as tasks arrive.
 
1405
  profile_req_count = sum(len(f.requirements) for f in profile.features)
1406
  print(f" profile features: {len(profile.features)}", flush=True)
1407
  print(f" profile requirements: {profile_req_count}", flush=True)
1408
+ progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
1409
+ _emit_progress(args.progress_file, 0, len(assets), None, progress_started)
1410
  with ProcessPoolExecutor(
1411
  max_workers=workers,
1412
  initializer=_pool_init,
 
1416
  ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
1417
  for a in assets
1418
  }
1419
+ completed = 0
1420
  for fut in as_completed(futures):
1421
  asset = futures[fut]
1422
+ completed += 1
1423
+ rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
1424
+ _emit_progress(args.progress_file, completed, len(assets), rel, progress_started)
1425
  try:
1426
  result, ext_records, issues = fut.result()
1427
  except Exception as e: