Spaces:
Sleeping
Sleeping
Commit ·
4e38f5d
1
Parent(s): 618cfad
Add per-asset progress endpoint for the dashboard's progress bar
Browse filesSource: simready-oem-library-pm@1c4d7c0
- runner.py: accept submission_id; seed /tmp/sr-progress/<id>.json
and pass --progress-file to the validator subprocess.
- app.py: /run_api takes a 5th positional (submission_id); new
/get_progress endpoint reads the file so the dashboard can poll
this Space directly while a validation is in flight.
- validate.py: --progress-file PATH; atomic JSON write after each
asset in both the sequential and parallel paths.
No build-context changes to requirements / Dockerfile beyond a stale
build-id comment removal — pip layer stays cached, only the app.py /
runner.py / validate.py COPYs invalidate, so build should be ~1 min.
tools/hf_space/Dockerfile
CHANGED
|
@@ -32,12 +32,20 @@ WORKDIR /home/${USER}/app
|
|
| 32 |
COPY tools/hf_space/requirements.txt ./requirements.txt
|
| 33 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 34 |
|
| 35 |
-
# Foundation specs:
|
| 36 |
-
#
|
| 37 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
|
| 39 |
-
|
|
|
|
| 40 |
${SIMREADY_FOUNDATIONS_PATH} \
|
|
|
|
|
|
|
| 41 |
&& chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
|
| 42 |
|
| 43 |
# Copy the bundled validator (the same code our DGXC runner uses) and
|
|
|
|
| 32 |
COPY tools/hf_space/requirements.txt ./requirements.txt
|
| 33 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 34 |
|
| 35 |
+
# Foundation specs: pinned to a specific commit so spec churn upstream
|
| 36 |
+
# doesn't break the Space's compatibility with the pinned simready-
|
| 37 |
+
# validate package. The validator's loader rejects features whose
|
| 38 |
+
# requirement codes aren't registered, and the foundation repo's main
|
| 39 |
+
# branch has moved ahead of what simready-validate 2026.4.x understands
|
| 40 |
+
# — sticking to the 2026.04.0 release tag (commit 805d2c5) matches the
|
| 41 |
+
# DGXC runner's working setup. Bump deliberately when both the spec
|
| 42 |
+
# and the validator package are tested together.
|
| 43 |
ENV SIMREADY_FOUNDATIONS_PATH=/opt/simready_foundations
|
| 44 |
+
ENV SIMREADY_FOUNDATIONS_COMMIT=805d2c50179a9878c89b0f41baaa0ecafe47c3d7
|
| 45 |
+
RUN git clone https://github.com/NVIDIA/simready-foundation \
|
| 46 |
${SIMREADY_FOUNDATIONS_PATH} \
|
| 47 |
+
&& cd ${SIMREADY_FOUNDATIONS_PATH} \
|
| 48 |
+
&& git checkout ${SIMREADY_FOUNDATIONS_COMMIT} \
|
| 49 |
&& chown -R ${USER}:${USER} ${SIMREADY_FOUNDATIONS_PATH}
|
| 50 |
|
| 51 |
# Copy the bundled validator (the same code our DGXC runner uses) and
|
tools/hf_space/README.md
CHANGED
|
@@ -66,8 +66,15 @@ Set the tier in the Space's **Settings → Hardware** page (or in
|
|
| 66 |
|
| 67 |
## Deploy
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
### 1. Create the Space `[BROWSER]`
|
| 73 |
|
|
|
|
| 66 |
|
| 67 |
## Deploy
|
| 68 |
|
| 69 |
+
This dance is captured as a Claude Code skill at
|
| 70 |
+
[`skills/deploy-hf-space/SKILL.md`](./skills/deploy-hf-space/SKILL.md).
|
| 71 |
+
Future operators can run `/deploy-hf-space [<slug>]` instead of
|
| 72 |
+
following this README by hand. The README below is the human-readable
|
| 73 |
+
mirror.
|
| 74 |
+
|
| 75 |
+
The Space is currently live at
|
| 76 |
+
[`nvidia/simready-validator`](https://huggingface.co/spaces/nvidia/simready-validator).
|
| 77 |
+
To re-stand it up from scratch:
|
| 78 |
|
| 79 |
### 1. Create the Space `[BROWSER]`
|
| 80 |
|
tools/hf_space/app.py
CHANGED
|
@@ -20,12 +20,13 @@ the Space's own token opens the verdict PR.
|
|
| 20 |
from __future__ import annotations
|
| 21 |
|
| 22 |
import dataclasses
|
|
|
|
| 23 |
import os
|
| 24 |
from pathlib import Path
|
| 25 |
|
| 26 |
import gradio as gr
|
| 27 |
|
| 28 |
-
from runner import run as run_validator
|
| 29 |
|
| 30 |
|
| 31 |
PROFILE_CHOICES = [
|
|
@@ -42,7 +43,8 @@ DEFAULT_PROFILE = "Prop-Robotics-Neutral"
|
|
| 42 |
DEFAULT_VERSION = "1.0.0"
|
| 43 |
|
| 44 |
|
| 45 |
-
def _run_api(dataset: str, profile: str, version: str, open_pr: bool
|
|
|
|
| 46 |
"""Programmatic endpoint. Returns the RunResult as a JSON dict.
|
| 47 |
|
| 48 |
Caller is typically `tools/hf_watch/call_hf_space.py` running from
|
|
@@ -51,12 +53,17 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
|
|
| 51 |
pattern-matches on the same field names `tools/hf_watch/validate.py`
|
| 52 |
produces, so status.json patching is identical regardless of which
|
| 53 |
backend ran the validation.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
"""
|
| 55 |
result = run_validator(
|
| 56 |
dataset=(dataset or "").strip(),
|
| 57 |
profile=profile or DEFAULT_PROFILE,
|
| 58 |
version=(version or DEFAULT_VERSION).strip(),
|
| 59 |
open_pr=bool(open_pr),
|
|
|
|
| 60 |
)
|
| 61 |
return {
|
| 62 |
"schema_version": 1,
|
|
@@ -70,6 +77,36 @@ def _run_api(dataset: str, profile: str, version: str, open_pr: bool) -> dict:
|
|
| 70 |
}
|
| 71 |
|
| 72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
def _sanitize_results_json(raw: dict) -> dict:
|
| 74 |
"""Strip absolute filesystem paths from results_json before returning.
|
| 75 |
|
|
@@ -181,15 +218,30 @@ with gr.Blocks(title="SimReady Validator") as demo:
|
|
| 181 |
api_profile = gr.Textbox(visible=False)
|
| 182 |
api_version = gr.Textbox(visible=False)
|
| 183 |
api_open_pr = gr.Checkbox(visible=False)
|
|
|
|
| 184 |
api_output = gr.JSON(visible=False)
|
| 185 |
api_button = gr.Button(visible=False)
|
| 186 |
api_button.click(
|
| 187 |
fn=_run_api,
|
| 188 |
-
inputs=[api_dataset, api_profile, api_version, api_open_pr],
|
| 189 |
outputs=api_output,
|
| 190 |
api_name="run_api",
|
| 191 |
)
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
if __name__ == "__main__":
|
| 195 |
demo.queue().launch(
|
|
|
|
| 20 |
from __future__ import annotations
|
| 21 |
|
| 22 |
import dataclasses
|
| 23 |
+
import json
|
| 24 |
import os
|
| 25 |
from pathlib import Path
|
| 26 |
|
| 27 |
import gradio as gr
|
| 28 |
|
| 29 |
+
from runner import run as run_validator, progress_path_for
|
| 30 |
|
| 31 |
|
| 32 |
PROFILE_CHOICES = [
|
|
|
|
| 43 |
DEFAULT_VERSION = "1.0.0"
|
| 44 |
|
| 45 |
|
| 46 |
+
def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
|
| 47 |
+
submission_id: str = "") -> dict:
|
| 48 |
"""Programmatic endpoint. Returns the RunResult as a JSON dict.
|
| 49 |
|
| 50 |
Caller is typically `tools/hf_watch/call_hf_space.py` running from
|
|
|
|
| 53 |
pattern-matches on the same field names `tools/hf_watch/validate.py`
|
| 54 |
produces, so status.json patching is identical regardless of which
|
| 55 |
backend ran the validation.
|
| 56 |
+
|
| 57 |
+
`submission_id` is optional — when set, the validator writes
|
| 58 |
+
per-asset progress to /tmp/sr-progress/<id>.json, which the
|
| 59 |
+
get_progress endpoint serves to the dashboard.
|
| 60 |
"""
|
| 61 |
result = run_validator(
|
| 62 |
dataset=(dataset or "").strip(),
|
| 63 |
profile=profile or DEFAULT_PROFILE,
|
| 64 |
version=(version or DEFAULT_VERSION).strip(),
|
| 65 |
open_pr=bool(open_pr),
|
| 66 |
+
submission_id=(submission_id or "").strip(),
|
| 67 |
)
|
| 68 |
return {
|
| 69 |
"schema_version": 1,
|
|
|
|
| 77 |
}
|
| 78 |
|
| 79 |
|
| 80 |
+
def _get_progress(submission_id: str) -> dict:
|
| 81 |
+
"""Read the validator's per-asset progress file for this submission.
|
| 82 |
+
|
| 83 |
+
Polled by the dashboard ~every 3 s while a Validate-now click is
|
| 84 |
+
in-flight, so the "Validate now" button can fill up as the
|
| 85 |
+
validator works through the asset list.
|
| 86 |
+
|
| 87 |
+
Returns one of three shapes:
|
| 88 |
+
- {"state": "not_found"} — no progress file (Space restarted, or
|
| 89 |
+
the dashboard is polling a Space-run that never happened).
|
| 90 |
+
- {"state": "starting"} — file seeded by runner.py before the
|
| 91 |
+
validator started its loop. processed/total are 0.
|
| 92 |
+
- {processed, total, current, started_at, updated_at} — live
|
| 93 |
+
per-asset progress written by validate.py._emit_progress.
|
| 94 |
+
|
| 95 |
+
Caller treats anything with total > 0 as "show the fill bar".
|
| 96 |
+
"""
|
| 97 |
+
sid = (submission_id or "").strip()
|
| 98 |
+
if not sid:
|
| 99 |
+
return {"state": "no_id"}
|
| 100 |
+
path = progress_path_for(sid)
|
| 101 |
+
if path is None or not path.is_file():
|
| 102 |
+
return {"state": "not_found"}
|
| 103 |
+
try:
|
| 104 |
+
return json.loads(path.read_text(encoding="utf-8"))
|
| 105 |
+
except (OSError, json.JSONDecodeError):
|
| 106 |
+
# Mid-write — caller will poll again in a few seconds.
|
| 107 |
+
return {"state": "transient"}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
def _sanitize_results_json(raw: dict) -> dict:
|
| 111 |
"""Strip absolute filesystem paths from results_json before returning.
|
| 112 |
|
|
|
|
| 218 |
api_profile = gr.Textbox(visible=False)
|
| 219 |
api_version = gr.Textbox(visible=False)
|
| 220 |
api_open_pr = gr.Checkbox(visible=False)
|
| 221 |
+
api_submission_id = gr.Textbox(visible=False)
|
| 222 |
api_output = gr.JSON(visible=False)
|
| 223 |
api_button = gr.Button(visible=False)
|
| 224 |
api_button.click(
|
| 225 |
fn=_run_api,
|
| 226 |
+
inputs=[api_dataset, api_profile, api_version, api_open_pr, api_submission_id],
|
| 227 |
outputs=api_output,
|
| 228 |
api_name="run_api",
|
| 229 |
)
|
| 230 |
|
| 231 |
+
# Progress endpoint — polled by the dashboard while a row is
|
| 232 |
+
# validating. CORS is open on /gradio_api/* by default, so the
|
| 233 |
+
# browser can fetch this from github.io directly without any
|
| 234 |
+
# GitHub-Actions side polling/commit churn.
|
| 235 |
+
prog_in = gr.Textbox(visible=False)
|
| 236 |
+
prog_out = gr.JSON(visible=False)
|
| 237 |
+
prog_button = gr.Button(visible=False)
|
| 238 |
+
prog_button.click(
|
| 239 |
+
fn=_get_progress,
|
| 240 |
+
inputs=[prog_in],
|
| 241 |
+
outputs=prog_out,
|
| 242 |
+
api_name="get_progress",
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
|
| 246 |
if __name__ == "__main__":
|
| 247 |
demo.queue().launch(
|
tools/hf_space/requirements.txt
CHANGED
|
@@ -18,4 +18,3 @@ omniverse-asset-validator==1.15.1
|
|
| 18 |
omniverse-usd-profiles==1.10.22
|
| 19 |
markdown-it-py>=4.2
|
| 20 |
simready-validate==2026.4.9
|
| 21 |
-
# build-id: 08b20703-0e97-4904-a7af-d2e0b3a42337
|
|
|
|
| 18 |
omniverse-usd-profiles==1.10.22
|
| 19 |
markdown-it-py>=4.2
|
| 20 |
simready-validate==2026.4.9
|
|
|
tools/hf_space/runner.py
CHANGED
|
@@ -152,6 +152,19 @@ def _open_verdict_pr(
|
|
| 152 |
return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
|
| 153 |
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
def run(
|
| 156 |
dataset: str,
|
| 157 |
profile: str = "Robot-Body-Runnable",
|
|
@@ -159,6 +172,7 @@ def run(
|
|
| 159 |
open_pr: bool = False,
|
| 160 |
hf_token: str | None = None,
|
| 161 |
log: Iterator[str] | None = None,
|
|
|
|
| 162 |
) -> RunResult:
|
| 163 |
"""Validate a single HF dataset. Yields log lines via the `log` callable.
|
| 164 |
|
|
@@ -207,6 +221,18 @@ def run(
|
|
| 207 |
"--output", str(out_dir),
|
| 208 |
"--no-use-kit",
|
| 209 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
out(f" $ {shlex.join(cmd)}")
|
| 211 |
# Capture stdout+stderr to a file so we can ship the tail back
|
| 212 |
# in the response when the validator crashes. Streaming the
|
|
|
|
| 152 |
return getattr(commit, "pr_url", None) or getattr(commit, "discussion_url", None)
|
| 153 |
|
| 154 |
|
| 155 |
+
PROGRESS_DIR = Path("/tmp/sr-progress")
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def progress_path_for(submission_id: str) -> Path:
|
| 159 |
+
"""Where the validator writes per-asset progress for this submission.
|
| 160 |
+
Read by the Space's get_progress endpoint to feed the dashboard's
|
| 161 |
+
fill-up progress bar. Empty submission_id → None (caller skips)."""
|
| 162 |
+
if not submission_id:
|
| 163 |
+
return None # type: ignore[return-value]
|
| 164 |
+
safe = "".join(c if c.isalnum() or c in "-_." else "_" for c in submission_id)
|
| 165 |
+
return PROGRESS_DIR / f"{safe}.json"
|
| 166 |
+
|
| 167 |
+
|
| 168 |
def run(
|
| 169 |
dataset: str,
|
| 170 |
profile: str = "Robot-Body-Runnable",
|
|
|
|
| 172 |
open_pr: bool = False,
|
| 173 |
hf_token: str | None = None,
|
| 174 |
log: Iterator[str] | None = None,
|
| 175 |
+
submission_id: str = "",
|
| 176 |
) -> RunResult:
|
| 177 |
"""Validate a single HF dataset. Yields log lines via the `log` callable.
|
| 178 |
|
|
|
|
| 221 |
"--output", str(out_dir),
|
| 222 |
"--no-use-kit",
|
| 223 |
]
|
| 224 |
+
prog_path = progress_path_for(submission_id) if submission_id else None
|
| 225 |
+
if prog_path:
|
| 226 |
+
PROGRESS_DIR.mkdir(parents=True, exist_ok=True)
|
| 227 |
+
# Seed an empty progress file so the dashboard sees "queued"
|
| 228 |
+
# immediately instead of "not found" while the validator
|
| 229 |
+
# boots (foundation-spec load is ~2 min).
|
| 230 |
+
prog_path.write_text(json.dumps({
|
| 231 |
+
"processed": 0, "total": 0, "current": None,
|
| 232 |
+
"started_at": _now(), "updated_at": _now(),
|
| 233 |
+
"state": "starting",
|
| 234 |
+
}))
|
| 235 |
+
cmd += ["--progress-file", str(prog_path)]
|
| 236 |
out(f" $ {shlex.join(cmd)}")
|
| 237 |
# Capture stdout+stderr to a file so we can ship the tail back
|
| 238 |
# in the response when the validator crashes. Streaming the
|
tools/validation/plugins/simready-report/skills/simready-report/validate.py
CHANGED
|
@@ -33,6 +33,35 @@ _PHYSX_PROFILES = frozenset({
|
|
| 33 |
})
|
| 34 |
|
| 35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
|
| 37 |
"""Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
|
| 38 |
flag = f"--{name}"
|
|
@@ -1190,6 +1219,12 @@ def main() -> int:
|
|
| 1190 |
"robot-named files to Robot-Body-* and prop-named files to "
|
| 1191 |
"Prop-Robotics-* in the same run, instead of one blanket "
|
| 1192 |
"profile per dataset.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
args = ap.parse_args()
|
| 1194 |
|
| 1195 |
# Parse the per-asset profile map up front so a malformed JSON
|
|
@@ -1338,12 +1373,16 @@ def main() -> int:
|
|
| 1338 |
profile_req_count = sum(len(f.requirements) for f in default_profile.features)
|
| 1339 |
print(f" profile features: {len(default_profile.features)}", flush=True)
|
| 1340 |
print(f" profile requirements: {profile_req_count}", flush=True)
|
| 1341 |
-
|
|
|
|
|
|
|
| 1342 |
pid = _profile_for(asset)
|
| 1343 |
if pid not in engines:
|
| 1344 |
engines[pid] = build_engine(pid, args.version)
|
| 1345 |
print(f" [profile-map] built engine for {pid}", flush=True)
|
| 1346 |
engine, profile = engines[pid]
|
|
|
|
|
|
|
| 1347 |
try:
|
| 1348 |
results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
|
| 1349 |
except Exception as e:
|
|
@@ -1351,12 +1390,13 @@ def main() -> int:
|
|
| 1351 |
ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
|
| 1352 |
results.append({
|
| 1353 |
"name": asset.name, "path": str(asset),
|
| 1354 |
-
"rel_path":
|
| 1355 |
"profile": pid, "profile_version": args.version,
|
| 1356 |
"passed": False,
|
| 1357 |
"issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
|
| 1358 |
"passed_features": [], "failed_features": [], "affected_prims": [],
|
| 1359 |
})
|
|
|
|
| 1360 |
else:
|
| 1361 |
# Parallel path. Workers load specs once via _pool_init, then
|
| 1362 |
# lazy-build (and cache) engines per profile_id as tasks arrive.
|
|
@@ -1365,6 +1405,8 @@ def main() -> int:
|
|
| 1365 |
profile_req_count = sum(len(f.requirements) for f in profile.features)
|
| 1366 |
print(f" profile features: {len(profile.features)}", flush=True)
|
| 1367 |
print(f" profile requirements: {profile_req_count}", flush=True)
|
|
|
|
|
|
|
| 1368 |
with ProcessPoolExecutor(
|
| 1369 |
max_workers=workers,
|
| 1370 |
initializer=_pool_init,
|
|
@@ -1374,8 +1416,12 @@ def main() -> int:
|
|
| 1374 |
ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
|
| 1375 |
for a in assets
|
| 1376 |
}
|
|
|
|
| 1377 |
for fut in as_completed(futures):
|
| 1378 |
asset = futures[fut]
|
|
|
|
|
|
|
|
|
|
| 1379 |
try:
|
| 1380 |
result, ext_records, issues = fut.result()
|
| 1381 |
except Exception as e:
|
|
|
|
| 33 |
})
|
| 34 |
|
| 35 |
|
| 36 |
+
def _emit_progress(path: str | None, processed: int, total: int,
|
| 37 |
+
current: str | None, started_at: str) -> None:
|
| 38 |
+
"""Atomically write progress JSON for the HF Space's get_progress
|
| 39 |
+
endpoint. No-op when --progress-file isn't set (CLI / DGXC paths).
|
| 40 |
+
|
| 41 |
+
Atomicity matters because the Space serves polls concurrently with
|
| 42 |
+
the validator's writes — a half-written file would 500 the endpoint.
|
| 43 |
+
Write to a sibling .tmp and os.replace (POSIX atomic on same FS)."""
|
| 44 |
+
if not path:
|
| 45 |
+
return
|
| 46 |
+
import os, tempfile, datetime
|
| 47 |
+
payload = {
|
| 48 |
+
"processed": processed,
|
| 49 |
+
"total": total,
|
| 50 |
+
"current": current,
|
| 51 |
+
"started_at": started_at,
|
| 52 |
+
"updated_at": datetime.datetime.now(datetime.timezone.utc).isoformat(timespec="seconds"),
|
| 53 |
+
}
|
| 54 |
+
try:
|
| 55 |
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
| 56 |
+
fd, tmp = tempfile.mkstemp(prefix=".progress-", dir=os.path.dirname(path) or ".")
|
| 57 |
+
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
| 58 |
+
json.dump(payload, f)
|
| 59 |
+
os.replace(tmp, path)
|
| 60 |
+
except OSError:
|
| 61 |
+
# Progress is advisory — don't crash the validator if the FS hiccups.
|
| 62 |
+
pass
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def _sniff_arg(argv: list[str], name: str, default: str | None = None) -> str | None:
|
| 66 |
"""Pull `--<name> <value>` out of argv without argparse. Returns default if absent."""
|
| 67 |
flag = f"--{name}"
|
|
|
|
| 1219 |
"robot-named files to Robot-Body-* and prop-named files to "
|
| 1220 |
"Prop-Robotics-* in the same run, instead of one blanket "
|
| 1221 |
"profile per dataset.")
|
| 1222 |
+
ap.add_argument("--progress-file", default=None,
|
| 1223 |
+
help="Path to a JSON file the validator updates after each "
|
| 1224 |
+
"asset with {processed, total, current, started_at, "
|
| 1225 |
+
"updated_at}. Polled by the HF Space's get_progress "
|
| 1226 |
+
"endpoint so the dashboard can show a real-time bar. "
|
| 1227 |
+
"Writes are atomic (tmp + rename).")
|
| 1228 |
args = ap.parse_args()
|
| 1229 |
|
| 1230 |
# Parse the per-asset profile map up front so a malformed JSON
|
|
|
|
| 1373 |
profile_req_count = sum(len(f.requirements) for f in default_profile.features)
|
| 1374 |
print(f" profile features: {len(default_profile.features)}", flush=True)
|
| 1375 |
print(f" profile requirements: {profile_req_count}", flush=True)
|
| 1376 |
+
progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
| 1377 |
+
_emit_progress(args.progress_file, 0, len(assets), None, progress_started)
|
| 1378 |
+
for i, asset in enumerate(assets):
|
| 1379 |
pid = _profile_for(asset)
|
| 1380 |
if pid not in engines:
|
| 1381 |
engines[pid] = build_engine(pid, args.version)
|
| 1382 |
print(f" [profile-map] built engine for {pid}", flush=True)
|
| 1383 |
engine, profile = engines[pid]
|
| 1384 |
+
rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
|
| 1385 |
+
_emit_progress(args.progress_file, i, len(assets), rel, progress_started)
|
| 1386 |
try:
|
| 1387 |
results.append(validate_one(engine, asset, target, profile, pid, args.version, ext_tracker))
|
| 1388 |
except Exception as e:
|
|
|
|
| 1390 |
ext_tracker.record_issue(str(asset), f"Validator crashed: {type(e).__name__}: {e}")
|
| 1391 |
results.append({
|
| 1392 |
"name": asset.name, "path": str(asset),
|
| 1393 |
+
"rel_path": rel,
|
| 1394 |
"profile": pid, "profile_version": args.version,
|
| 1395 |
"passed": False,
|
| 1396 |
"issues": [{"code": "SDK.CRASH", "severity": "error", "msg": str(e), "prim": "/", "rule": None}],
|
| 1397 |
"passed_features": [], "failed_features": [], "affected_prims": [],
|
| 1398 |
})
|
| 1399 |
+
_emit_progress(args.progress_file, len(assets), len(assets), None, progress_started)
|
| 1400 |
else:
|
| 1401 |
# Parallel path. Workers load specs once via _pool_init, then
|
| 1402 |
# lazy-build (and cache) engines per profile_id as tasks arrive.
|
|
|
|
| 1405 |
profile_req_count = sum(len(f.requirements) for f in profile.features)
|
| 1406 |
print(f" profile features: {len(profile.features)}", flush=True)
|
| 1407 |
print(f" profile requirements: {profile_req_count}", flush=True)
|
| 1408 |
+
progress_started = datetime.now(timezone.utc).isoformat(timespec="seconds")
|
| 1409 |
+
_emit_progress(args.progress_file, 0, len(assets), None, progress_started)
|
| 1410 |
with ProcessPoolExecutor(
|
| 1411 |
max_workers=workers,
|
| 1412 |
initializer=_pool_init,
|
|
|
|
| 1416 |
ex.submit(_pool_validate, str(a), str(target), _profile_for(a), args.version): a
|
| 1417 |
for a in assets
|
| 1418 |
}
|
| 1419 |
+
completed = 0
|
| 1420 |
for fut in as_completed(futures):
|
| 1421 |
asset = futures[fut]
|
| 1422 |
+
completed += 1
|
| 1423 |
+
rel = str(asset.relative_to(target)) if asset.is_relative_to(target) else str(asset)
|
| 1424 |
+
_emit_progress(args.progress_file, completed, len(assets), rel, progress_started)
|
| 1425 |
try:
|
| 1426 |
result, ext_records, issues = fut.result()
|
| 1427 |
except Exception as e:
|