loginowskid commited on
Commit
74d52fc
·
verified ·
1 Parent(s): cd53438

list_profiles: --no-use-kit (fast enumeration; fixes 300s timeout)

Browse files
Files changed (1) hide show
  1. tools/hf_space/app.py +429 -429
tools/hf_space/app.py CHANGED
@@ -1,429 +1,429 @@
1
- """SimReady Validator — Gradio UI for the HuggingFace Space.
2
-
3
- Two surfaces, same engine:
4
-
5
- - **/run** (the on-screen button) — streams log lines to the UI for
6
- interactive use by an operator in the browser.
7
- - **/run_api** (hidden, programmatic) — returns the full RunResult as
8
- a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
9
- hits from the GitHub Actions runner so the workflow can patch
10
- status.json and asset-status.json without scraping the UI's text.
11
-
12
- Both go through `runner.run()`. The split is purely about output
13
- shape (streaming text vs. one-shot dict).
14
-
15
- The Space is internal-pilot scope: HF_TOKEN comes from the Space's
16
- secrets, NOT from the requester. When a customer's dataset PR triggers
17
- this (next milestone), the webhook payload identifies the dataset and
18
- the Space's own token opens the verdict PR.
19
- """
20
- from __future__ import annotations
21
-
22
- import json
23
- import os
24
- from pathlib import Path
25
-
26
- import gradio as gr
27
-
28
- from runner import (run as run_validator, progress_path_for, cancel_path_for,
29
- run_token_path_for, CANCEL_DIR)
30
-
31
-
32
- PROFILE_CHOICES = [
33
- "Prop-Robotics-Neutral",
34
- "Prop-Robotics-Physx",
35
- "Prop-Robotics-Isaac",
36
- "Robot-Body-Neutral",
37
- "Robot-Body-Runnable",
38
- "Robot-Body-Isaac",
39
- "Package",
40
- "Package-Candidate",
41
- ]
42
- DEFAULT_PROFILE = "Prop-Robotics-Neutral"
43
- DEFAULT_VERSION = "1.0.0"
44
-
45
-
46
- def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
47
- submission_id: str = "", force: bool = False,
48
- preliminary: bool = False) -> dict:
49
- """Programmatic endpoint. Returns the RunResult as a JSON dict.
50
-
51
- Caller is typically `tools/hf_watch/call_hf_space.py` running from
52
- a GitHub Actions ubuntu-latest runner. Output shape must stay
53
- stable — bump `schema_version` if you change it. The receiver
54
- pattern-matches on the same field names `tools/hf_watch/validate.py`
55
- produces, so status.json patching is identical regardless of which
56
- backend ran the validation.
57
-
58
- `submission_id` is optional — when set, the validator writes
59
- per-asset progress to /tmp/sr-progress/<id>.json, which the
60
- get_progress endpoint serves to the dashboard.
61
-
62
- `preliminary` switches the runner to a structure-only sweep:
63
- zip-bundled datasets are scanned (instead of failing
64
- PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
65
- sliced to the first asset only. Used by the dashboard's
66
- Preliminary scan tab.
67
- """
68
- print(f"[run_api] preliminary={preliminary!r} force={force!r} "
69
- f"submission_id={submission_id!r}", flush=True)
70
- # Untrusted callers can hit /run_api directly — profile/version flow
71
- # into the validator's argv, so validate them before use. Empty
72
- # falls back to the defaults (existing behavior).
73
- import re
74
- profile = profile or DEFAULT_PROFILE
75
- if profile not in PROFILE_CHOICES and profile.lower() != "auto":
76
- raise ValueError(f"invalid profile: {profile!r}")
77
- version = (version or DEFAULT_VERSION).strip()
78
- if not re.fullmatch(r"[\w.\-]+", version):
79
- raise ValueError(f"invalid version: {version!r}")
80
- result = run_validator(
81
- dataset=(dataset or "").strip(),
82
- profile=profile,
83
- version=version,
84
- open_pr=bool(open_pr),
85
- submission_id=(submission_id or "").strip(),
86
- force=bool(force),
87
- preliminary=bool(preliminary),
88
- )
89
- return {
90
- "schema_version": 1,
91
- "dataset": result.dataset,
92
- "profile": result.profile,
93
- "version": result.version,
94
- "status": result.status,
95
- "summary": result.summary,
96
- "results_json": _sanitize_results_json(result.results_json),
97
- "pr_url": result.pr_url,
98
- }
99
-
100
-
101
- def _list_profiles() -> dict:
102
- """Return the set of profiles that actually load on this Space's
103
- foundation+validator combination. The dashboard polls this to
104
- populate its dropdown so operators can't pick a profile that
105
- would fatally fail at registration time.
106
-
107
- Uses --use-plugin since the default CLI loader has known
108
- registration mismatches against the current foundation pin; the
109
- plugin path is what runner.py's streaming-zip flow falls back
110
- to and is the source of truth for "actually usable" here.
111
-
112
- Output format from validate.py is `PROFILE: <id> v<version>`
113
- per profile, one per line.
114
- """
115
- import subprocess, sys
116
- from runner import VALIDATOR
117
- try:
118
- proc = subprocess.run(
119
- # --list-profiles only ENUMERATES registered profiles from the
120
- # spec/plugin registry (--use-plugin) — it runs no validation
121
- # rules, so it never needs Kit. Force --no-use-kit: on a
122
- # Kit-enabled image the validator auto-enables --use-kit for the
123
- # PhysX-bearing default profile and boots the full Isaac Sim
124
- # runtime (~5 min) just to print the list, blowing the 300s
125
- # timeout below. Actual validation (runner.py) still uses Kit.
126
- [sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
127
- capture_output=True, text=True, timeout=300,
128
- )
129
- names: list[str] = []
130
- for line in (proc.stdout or "").splitlines():
131
- s = line.strip()
132
- # Validator emits "PROFILE: <id> v<version>" — that's our
133
- # only authoritative shape. Anything else is noise.
134
- if s.startswith("PROFILE:"):
135
- rest = s[len("PROFILE:"):].strip()
136
- pid = rest.split()[0] if rest else ""
137
- if pid:
138
- names.append(pid)
139
- # Dedupe while preserving order.
140
- seen = set()
141
- unique = []
142
- for n in names:
143
- if n not in seen:
144
- seen.add(n)
145
- unique.append(n)
146
- result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
147
- if not unique:
148
- # No profiles registered AND no parse hits — surface why so
149
- # the dashboard can show something useful. Truncate so the
150
- # JSON response stays small.
151
- stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
152
- stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
153
- result["stderr_tail"] = stderr_tail
154
- result["stdout_tail"] = stdout_tail
155
- return result
156
- except subprocess.TimeoutExpired:
157
- return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
158
- except Exception as e:
159
- return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
160
-
161
-
162
- def _cancel_run(submission_id: str, run_token: str = "") -> dict:
163
- """Write the cancel-signal file for a given submission. The
164
- streaming-zip loop in runner.py checks for this file between zips
165
- and aborts when present. Idempotent — calling multiple times has no
166
- extra effect; consuming runner.py deletes it.
167
-
168
- `run_token` is the per-run token the dashboard read from get_progress.
169
- It becomes the flag's content so runner._is_cancelled only honors it
170
- for the exact run it was issued against — a flag left over from a
171
- prior run of this submission can never abort a fresh one."""
172
- sid = (submission_id or "").strip()
173
- if not sid:
174
- return {"state": "no_id"}
175
- path = cancel_path_for(sid)
176
- if path is None:
177
- return {"state": "no_id"}
178
- try:
179
- CANCEL_DIR.mkdir(parents=True, exist_ok=True)
180
- path.write_text((run_token or "").strip(), encoding="utf-8")
181
- return {"state": "signaled", "path": str(path)}
182
- except OSError as e:
183
- return {"state": "error", "error": f"{type(e).__name__}: {e}"}
184
-
185
-
186
- def _get_progress(submission_id: str) -> dict:
187
- """Read the validator's per-asset progress file for this submission.
188
-
189
- Polled by the dashboard ~every 3 s while a Validate-now click is
190
- in-flight, so the "Validate now" button can fill up as the
191
- validator works through the asset list.
192
-
193
- Returns one of three shapes:
194
- - {"state": "not_found"} — no progress file (Space restarted, or
195
- the dashboard is polling a Space-run that never happened).
196
- - {"state": "starting"} — file seeded by runner.py before the
197
- validator started its loop. processed/total are 0.
198
- - {processed, total, current, started_at, updated_at} — live
199
- per-asset progress written by validate.py._emit_progress.
200
-
201
- Every shape also carries `run_token` (the current run's cancel
202
- token, from the sidecar file) when one exists, so the dashboard can
203
- echo it back to cancel_run and target the exact run.
204
-
205
- Caller treats anything with total > 0 as "show the fill bar".
206
- """
207
- sid = (submission_id or "").strip()
208
- if not sid:
209
- return {"state": "no_id"}
210
- # Per-run cancel token (sidecar; see runner.run_token_path_for).
211
- # Surfaced on every shape so the dashboard can echo it back to
212
- # cancel_run — a cancel then only aborts the run it was issued
213
- # against, never a later one that reused the submission_id.
214
- run_token = ""
215
- tok_path = run_token_path_for(sid)
216
- if tok_path and tok_path.is_file():
217
- try:
218
- run_token = tok_path.read_text(encoding="utf-8").strip()
219
- except OSError:
220
- pass
221
- path = progress_path_for(sid)
222
- if path is None or not path.is_file():
223
- return {"state": "not_found", "run_token": run_token}
224
- try:
225
- data = json.loads(path.read_text(encoding="utf-8"))
226
- if isinstance(data, dict) and run_token:
227
- data["run_token"] = run_token
228
- return data
229
- except (OSError, json.JSONDecodeError):
230
- # Mid-write — caller will poll again in a few seconds.
231
- return {"state": "transient", "run_token": run_token}
232
-
233
-
234
- def _sanitize_results_json(raw: dict) -> dict:
235
- """Strip absolute filesystem paths from results_json before returning.
236
-
237
- Gradio's JSON serializer treats string fields that resolve to files
238
- on the Space's filesystem as downloadable references and tries to
239
- serve them through `/gradio_api/file=...`. The validator's
240
- results.json contains absolute paths (target dir + per-asset
241
- `path`) which point into the Space's ephemeral tempdir and are
242
- NOT exposed through gradio's allowed_paths — gradio_client then
243
- fails with 403 trying to auto-fetch them after a successful run.
244
-
245
- Callers don't need filesystem paths anyway — only `rel_path`
246
- (dataset-relative), `passed`, and `issues` are used downstream.
247
- Keep the rest of the report intact (profile_coverage, summary,
248
- layout_findings, etc.).
249
- """
250
- if not isinstance(raw, dict):
251
- return raw
252
- sanitized = {k: v for k, v in raw.items() if k != "target"}
253
- if "results" in sanitized and isinstance(sanitized["results"], list):
254
- sanitized["results"] = [
255
- {k: v for k, v in asset.items() if k != "path"}
256
- for asset in sanitized["results"]
257
- if isinstance(asset, dict)
258
- ]
259
- # Specs/dashboard dir paths are local to the Space, useless to caller.
260
- for k in ("specs_docs_dir", "dashboard_docs_dir"):
261
- sanitized.pop(k, None)
262
- return sanitized
263
-
264
-
265
- def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
266
- """Generator that yields incremental log output to the UI as the
267
- validator runs. Gradio streams each yielded tuple to the connected
268
- outputs."""
269
- lines: list[str] = []
270
-
271
- def log(line: str) -> None:
272
- lines.append(line)
273
-
274
- yield "\n".join(lines), "", "(running…)", None
275
-
276
- try:
277
- result = run_validator(
278
- dataset=dataset.strip(),
279
- profile=profile,
280
- version=version.strip() or DEFAULT_VERSION,
281
- open_pr=open_pr,
282
- log=log,
283
- )
284
- except Exception as e:
285
- lines.append(f"\nERROR: {type(e).__name__}: {e}")
286
- yield "\n".join(lines), "", f"error: {e}", None
287
- return
288
-
289
- status_badge = f"**{result.status.upper()}** — {result.summary}"
290
- if result.pr_url:
291
- status_badge += f"\n\nPR: {result.pr_url}"
292
-
293
- report_index = result.report_path / "index.html"
294
- report_url = str(report_index) if report_index.is_file() else None
295
-
296
- yield (
297
- "\n".join(lines),
298
- status_badge,
299
- result.summary,
300
- report_url,
301
- )
302
-
303
-
304
- def _read_md(name: str) -> str:
305
- """Return the contents of name (relative to this file's dir),
306
- stripping a leading YAML frontmatter block if present. Falls back
307
- to a friendly stub when the file is missing — keeps the Space
308
- bootable even before the space-deploy workflow has synced the
309
- assembled docs into the container."""
310
- from pathlib import Path
311
- p = Path(__file__).resolve().parent / name
312
- try:
313
- src = p.read_text(encoding="utf-8")
314
- except FileNotFoundError:
315
- return f"_{name} not yet synced into this Space — check back after the next deploy._"
316
- if src.startswith("---"):
317
- end = src.find("\n---\n", 4)
318
- if end > 0:
319
- src = src[end + len("\n---\n"):].lstrip()
320
- return src
321
-
322
-
323
- with gr.Blocks(title="SimReady Validator") as demo:
324
- with gr.Tabs():
325
- with gr.Tab("Overview"):
326
- gr.Markdown(_read_md("README.md"))
327
- with gr.Tab("Validator"):
328
- gr.Markdown(
329
- "Submit a HuggingFace dataset to validate against a SimReady "
330
- "profile. With **Open PR** enabled, the verdict is uploaded "
331
- "back to the dataset as a `validation/` pull request."
332
- )
333
- with gr.Row():
334
- dataset = gr.Textbox(
335
- label="Dataset",
336
- placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
337
- )
338
- with gr.Row():
339
- profile = gr.Dropdown(
340
- choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
341
- )
342
- version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
343
- open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
344
- run_btn = gr.Button("Validate", variant="primary")
345
- status_md = gr.Markdown(label="Verdict")
346
- summary_box = gr.Textbox(label="Summary", interactive=False)
347
- log_box = gr.Textbox(label="Log", lines=20, interactive=False)
348
- report_link = gr.File(label="HTML report (download)", interactive=False)
349
- with gr.Tab("Partner walkthrough"):
350
- gr.Markdown(_read_md("VALIDATE.md"))
351
-
352
- run_btn.click(
353
- fn=_run_streaming,
354
- inputs=[dataset, profile, version, open_pr],
355
- outputs=[log_box, status_md, summary_box, report_link],
356
- api_name="run",
357
- )
358
-
359
- # Programmatic endpoint — bound to invisible components so the UI
360
- # doesn't render anything extra, but the Gradio queue still exposes
361
- # an `/api/predict/run_api` route the gradio_client can hit. The
362
- # outputs[0] is the JSON return; api_name turns it into a stable
363
- # path the GitHub Actions caller depends on.
364
- api_dataset = gr.Textbox(visible=False)
365
- api_profile = gr.Textbox(visible=False)
366
- api_version = gr.Textbox(visible=False)
367
- api_open_pr = gr.Checkbox(visible=False)
368
- api_submission_id = gr.Textbox(visible=False)
369
- api_force = gr.Checkbox(visible=False)
370
- api_preliminary = gr.Checkbox(visible=False)
371
- api_output = gr.JSON(visible=False)
372
- api_button = gr.Button(visible=False)
373
- api_button.click(
374
- fn=_run_api,
375
- inputs=[api_dataset, api_profile, api_version, api_open_pr,
376
- api_submission_id, api_force, api_preliminary],
377
- outputs=api_output,
378
- api_name="run_api",
379
- )
380
-
381
- # Progress endpoint — polled by the dashboard while a row is
382
- # validating. CORS is open on /gradio_api/* by default, so the
383
- # browser can fetch this from github.io directly without any
384
- # GitHub-Actions side polling/commit churn.
385
- prog_in = gr.Textbox(visible=False)
386
- prog_out = gr.JSON(visible=False)
387
- prog_button = gr.Button(visible=False)
388
- prog_button.click(
389
- fn=_get_progress,
390
- inputs=[prog_in],
391
- outputs=prog_out,
392
- api_name="get_progress",
393
- )
394
-
395
- # Profile-listing endpoint — polled by the dashboard at startup
396
- # so its dropdown reflects what's actually loadable on this Space
397
- # right now (foundation+validator pin determines which profiles
398
- # register). Stops the operator from picking something that
399
- # would fatal at runtime.
400
- profiles_out = gr.JSON(visible=False)
401
- profiles_button = gr.Button(visible=False)
402
- profiles_button.click(
403
- fn=_list_profiles,
404
- inputs=None,
405
- outputs=profiles_out,
406
- api_name="list_profiles",
407
- )
408
-
409
- # Cancel endpoint — the dashboard's Cancel button calls this AFTER
410
- # cancelling the GH Action so the in-flight server-side gradio call
411
- # actually stops (cancelling the Action alone only kills the
412
- # gradio_client wrapper, the Space's loop keeps going).
413
- cancel_in = gr.Textbox(visible=False)
414
- cancel_token = gr.Textbox(visible=False)
415
- cancel_out = gr.JSON(visible=False)
416
- cancel_button = gr.Button(visible=False)
417
- cancel_button.click(
418
- fn=_cancel_run,
419
- inputs=[cancel_in, cancel_token],
420
- outputs=cancel_out,
421
- api_name="cancel_run",
422
- )
423
-
424
-
425
- if __name__ == "__main__":
426
- demo.queue().launch(
427
- server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
428
- server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
429
- )
 
1
+ """SimReady Validator — Gradio UI for the HuggingFace Space.
2
+
3
+ Two surfaces, same engine:
4
+
5
+ - **/run** (the on-screen button) — streams log lines to the UI for
6
+ interactive use by an operator in the browser.
7
+ - **/run_api** (hidden, programmatic) — returns the full RunResult as
8
+ a JSON-serializable dict. This is what `tools/hf_watch/call_hf_space.py`
9
+ hits from the GitHub Actions runner so the workflow can patch
10
+ status.json and asset-status.json without scraping the UI's text.
11
+
12
+ Both go through `runner.run()`. The split is purely about output
13
+ shape (streaming text vs. one-shot dict).
14
+
15
+ The Space is internal-pilot scope: HF_TOKEN comes from the Space's
16
+ secrets, NOT from the requester. When a customer's dataset PR triggers
17
+ this (next milestone), the webhook payload identifies the dataset and
18
+ the Space's own token opens the verdict PR.
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ import os
24
+ from pathlib import Path
25
+
26
+ import gradio as gr
27
+
28
+ from runner import (run as run_validator, progress_path_for, cancel_path_for,
29
+ run_token_path_for, CANCEL_DIR)
30
+
31
+
32
+ PROFILE_CHOICES = [
33
+ "Prop-Robotics-Neutral",
34
+ "Prop-Robotics-Physx",
35
+ "Prop-Robotics-Isaac",
36
+ "Robot-Body-Neutral",
37
+ "Robot-Body-Runnable",
38
+ "Robot-Body-Isaac",
39
+ "Package",
40
+ "Package-Candidate",
41
+ ]
42
+ DEFAULT_PROFILE = "Prop-Robotics-Neutral"
43
+ DEFAULT_VERSION = "1.0.0"
44
+
45
+
46
+ def _run_api(dataset: str, profile: str, version: str, open_pr: bool,
47
+ submission_id: str = "", force: bool = False,
48
+ preliminary: bool = False) -> dict:
49
+ """Programmatic endpoint. Returns the RunResult as a JSON dict.
50
+
51
+ Caller is typically `tools/hf_watch/call_hf_space.py` running from
52
+ a GitHub Actions ubuntu-latest runner. Output shape must stay
53
+ stable — bump `schema_version` if you change it. The receiver
54
+ pattern-matches on the same field names `tools/hf_watch/validate.py`
55
+ produces, so status.json patching is identical regardless of which
56
+ backend ran the validation.
57
+
58
+ `submission_id` is optional — when set, the validator writes
59
+ per-asset progress to /tmp/sr-progress/<id>.json, which the
60
+ get_progress endpoint serves to the dashboard.
61
+
62
+ `preliminary` switches the runner to a structure-only sweep:
63
+ zip-bundled datasets are scanned (instead of failing
64
+ PKG.NO-ARCHIVES at the listing stage) and per-asset validation is
65
+ sliced to the first asset only. Used by the dashboard's
66
+ Preliminary scan tab.
67
+ """
68
+ print(f"[run_api] preliminary={preliminary!r} force={force!r} "
69
+ f"submission_id={submission_id!r}", flush=True)
70
+ # Untrusted callers can hit /run_api directly — profile/version flow
71
+ # into the validator's argv, so validate them before use. Empty
72
+ # falls back to the defaults (existing behavior).
73
+ import re
74
+ profile = profile or DEFAULT_PROFILE
75
+ if profile not in PROFILE_CHOICES and profile.lower() != "auto":
76
+ raise ValueError(f"invalid profile: {profile!r}")
77
+ version = (version or DEFAULT_VERSION).strip()
78
+ if not re.fullmatch(r"[\w.\-]+", version):
79
+ raise ValueError(f"invalid version: {version!r}")
80
+ result = run_validator(
81
+ dataset=(dataset or "").strip(),
82
+ profile=profile,
83
+ version=version,
84
+ open_pr=bool(open_pr),
85
+ submission_id=(submission_id or "").strip(),
86
+ force=bool(force),
87
+ preliminary=bool(preliminary),
88
+ )
89
+ return {
90
+ "schema_version": 1,
91
+ "dataset": result.dataset,
92
+ "profile": result.profile,
93
+ "version": result.version,
94
+ "status": result.status,
95
+ "summary": result.summary,
96
+ "results_json": _sanitize_results_json(result.results_json),
97
+ "pr_url": result.pr_url,
98
+ }
99
+
100
+
101
+ def _list_profiles() -> dict:
102
+ """Return the set of profiles that actually load on this Space's
103
+ foundation+validator combination. The dashboard polls this to
104
+ populate its dropdown so operators can't pick a profile that
105
+ would fatally fail at registration time.
106
+
107
+ Uses --use-plugin since the default CLI loader has known
108
+ registration mismatches against the current foundation pin; the
109
+ plugin path is what runner.py's streaming-zip flow falls back
110
+ to and is the source of truth for "actually usable" here.
111
+
112
+ Output format from validate.py is `PROFILE: <id> v<version>`
113
+ per profile, one per line.
114
+ """
115
+ import subprocess, sys
116
+ from runner import VALIDATOR
117
+ try:
118
+ proc = subprocess.run(
119
+ # --list-profiles only ENUMERATES registered profiles from the
120
+ # spec/plugin registry (--use-plugin) — it runs no validation
121
+ # rules, so it never needs Kit. Force --no-use-kit: on a
122
+ # Kit-enabled image the validator auto-enables --use-kit for the
123
+ # PhysX-bearing default profile and boots the full Isaac Sim
124
+ # runtime (~5 min) just to print the list, blowing the 300s
125
+ # timeout below. Actual validation (runner.py) still uses Kit.
126
+ [sys.executable, str(VALIDATOR), "--list-profiles", "--use-plugin", "--no-use-kit"],
127
+ capture_output=True, text=True, timeout=300,
128
+ )
129
+ names: list[str] = []
130
+ for line in (proc.stdout or "").splitlines():
131
+ s = line.strip()
132
+ # Validator emits "PROFILE: <id> v<version>" — that's our
133
+ # only authoritative shape. Anything else is noise.
134
+ if s.startswith("PROFILE:"):
135
+ rest = s[len("PROFILE:"):].strip()
136
+ pid = rest.split()[0] if rest else ""
137
+ if pid:
138
+ names.append(pid)
139
+ # Dedupe while preserving order.
140
+ seen = set()
141
+ unique = []
142
+ for n in names:
143
+ if n not in seen:
144
+ seen.add(n)
145
+ unique.append(n)
146
+ result: dict = {"profiles": unique, "schema_version": 1, "rc": proc.returncode}
147
+ if not unique:
148
+ # No profiles registered AND no parse hits — surface why so
149
+ # the dashboard can show something useful. Truncate so the
150
+ # JSON response stays small.
151
+ stderr_tail = "\n".join((proc.stderr or "").splitlines()[-20:])[:2000]
152
+ stdout_tail = "\n".join((proc.stdout or "").splitlines()[-20:])[:2000]
153
+ result["stderr_tail"] = stderr_tail
154
+ result["stdout_tail"] = stdout_tail
155
+ return result
156
+ except subprocess.TimeoutExpired:
157
+ return {"profiles": [], "error": "timeout after 300s (spec load >5 min)"}
158
+ except Exception as e:
159
+ return {"profiles": [], "error": f"{type(e).__name__}: {e}"}
160
+
161
+
162
+ def _cancel_run(submission_id: str, run_token: str = "") -> dict:
163
+ """Write the cancel-signal file for a given submission. The
164
+ streaming-zip loop in runner.py checks for this file between zips
165
+ and aborts when present. Idempotent — calling multiple times has no
166
+ extra effect; consuming runner.py deletes it.
167
+
168
+ `run_token` is the per-run token the dashboard read from get_progress.
169
+ It becomes the flag's content so runner._is_cancelled only honors it
170
+ for the exact run it was issued against — a flag left over from a
171
+ prior run of this submission can never abort a fresh one."""
172
+ sid = (submission_id or "").strip()
173
+ if not sid:
174
+ return {"state": "no_id"}
175
+ path = cancel_path_for(sid)
176
+ if path is None:
177
+ return {"state": "no_id"}
178
+ try:
179
+ CANCEL_DIR.mkdir(parents=True, exist_ok=True)
180
+ path.write_text((run_token or "").strip(), encoding="utf-8")
181
+ return {"state": "signaled", "path": str(path)}
182
+ except OSError as e:
183
+ return {"state": "error", "error": f"{type(e).__name__}: {e}"}
184
+
185
+
186
+ def _get_progress(submission_id: str) -> dict:
187
+ """Read the validator's per-asset progress file for this submission.
188
+
189
+ Polled by the dashboard ~every 3 s while a Validate-now click is
190
+ in-flight, so the "Validate now" button can fill up as the
191
+ validator works through the asset list.
192
+
193
+ Returns one of three shapes:
194
+ - {"state": "not_found"} — no progress file (Space restarted, or
195
+ the dashboard is polling a Space-run that never happened).
196
+ - {"state": "starting"} — file seeded by runner.py before the
197
+ validator started its loop. processed/total are 0.
198
+ - {processed, total, current, started_at, updated_at} — live
199
+ per-asset progress written by validate.py._emit_progress.
200
+
201
+ Every shape also carries `run_token` (the current run's cancel
202
+ token, from the sidecar file) when one exists, so the dashboard can
203
+ echo it back to cancel_run and target the exact run.
204
+
205
+ Caller treats anything with total > 0 as "show the fill bar".
206
+ """
207
+ sid = (submission_id or "").strip()
208
+ if not sid:
209
+ return {"state": "no_id"}
210
+ # Per-run cancel token (sidecar; see runner.run_token_path_for).
211
+ # Surfaced on every shape so the dashboard can echo it back to
212
+ # cancel_run — a cancel then only aborts the run it was issued
213
+ # against, never a later one that reused the submission_id.
214
+ run_token = ""
215
+ tok_path = run_token_path_for(sid)
216
+ if tok_path and tok_path.is_file():
217
+ try:
218
+ run_token = tok_path.read_text(encoding="utf-8").strip()
219
+ except OSError:
220
+ pass
221
+ path = progress_path_for(sid)
222
+ if path is None or not path.is_file():
223
+ return {"state": "not_found", "run_token": run_token}
224
+ try:
225
+ data = json.loads(path.read_text(encoding="utf-8"))
226
+ if isinstance(data, dict) and run_token:
227
+ data["run_token"] = run_token
228
+ return data
229
+ except (OSError, json.JSONDecodeError):
230
+ # Mid-write — caller will poll again in a few seconds.
231
+ return {"state": "transient", "run_token": run_token}
232
+
233
+
234
+ def _sanitize_results_json(raw: dict) -> dict:
235
+ """Strip absolute filesystem paths from results_json before returning.
236
+
237
+ Gradio's JSON serializer treats string fields that resolve to files
238
+ on the Space's filesystem as downloadable references and tries to
239
+ serve them through `/gradio_api/file=...`. The validator's
240
+ results.json contains absolute paths (target dir + per-asset
241
+ `path`) which point into the Space's ephemeral tempdir and are
242
+ NOT exposed through gradio's allowed_paths — gradio_client then
243
+ fails with 403 trying to auto-fetch them after a successful run.
244
+
245
+ Callers don't need filesystem paths anyway — only `rel_path`
246
+ (dataset-relative), `passed`, and `issues` are used downstream.
247
+ Keep the rest of the report intact (profile_coverage, summary,
248
+ layout_findings, etc.).
249
+ """
250
+ if not isinstance(raw, dict):
251
+ return raw
252
+ sanitized = {k: v for k, v in raw.items() if k != "target"}
253
+ if "results" in sanitized and isinstance(sanitized["results"], list):
254
+ sanitized["results"] = [
255
+ {k: v for k, v in asset.items() if k != "path"}
256
+ for asset in sanitized["results"]
257
+ if isinstance(asset, dict)
258
+ ]
259
+ # Specs/dashboard dir paths are local to the Space, useless to caller.
260
+ for k in ("specs_docs_dir", "dashboard_docs_dir"):
261
+ sanitized.pop(k, None)
262
+ return sanitized
263
+
264
+
265
+ def _run_streaming(dataset: str, profile: str, version: str, open_pr: bool):
266
+ """Generator that yields incremental log output to the UI as the
267
+ validator runs. Gradio streams each yielded tuple to the connected
268
+ outputs."""
269
+ lines: list[str] = []
270
+
271
+ def log(line: str) -> None:
272
+ lines.append(line)
273
+
274
+ yield "\n".join(lines), "", "(running…)", None
275
+
276
+ try:
277
+ result = run_validator(
278
+ dataset=dataset.strip(),
279
+ profile=profile,
280
+ version=version.strip() or DEFAULT_VERSION,
281
+ open_pr=open_pr,
282
+ log=log,
283
+ )
284
+ except Exception as e:
285
+ lines.append(f"\nERROR: {type(e).__name__}: {e}")
286
+ yield "\n".join(lines), "", f"error: {e}", None
287
+ return
288
+
289
+ status_badge = f"**{result.status.upper()}** — {result.summary}"
290
+ if result.pr_url:
291
+ status_badge += f"\n\nPR: {result.pr_url}"
292
+
293
+ report_index = result.report_path / "index.html"
294
+ report_url = str(report_index) if report_index.is_file() else None
295
+
296
+ yield (
297
+ "\n".join(lines),
298
+ status_badge,
299
+ result.summary,
300
+ report_url,
301
+ )
302
+
303
+
304
+ def _read_md(name: str) -> str:
305
+ """Return the contents of name (relative to this file's dir),
306
+ stripping a leading YAML frontmatter block if present. Falls back
307
+ to a friendly stub when the file is missing — keeps the Space
308
+ bootable even before the space-deploy workflow has synced the
309
+ assembled docs into the container."""
310
+ from pathlib import Path
311
+ p = Path(__file__).resolve().parent / name
312
+ try:
313
+ src = p.read_text(encoding="utf-8")
314
+ except FileNotFoundError:
315
+ return f"_{name} not yet synced into this Space — check back after the next deploy._"
316
+ if src.startswith("---"):
317
+ end = src.find("\n---\n", 4)
318
+ if end > 0:
319
+ src = src[end + len("\n---\n"):].lstrip()
320
+ return src
321
+
322
+
323
+ with gr.Blocks(title="SimReady Validator") as demo:
324
+ with gr.Tabs():
325
+ with gr.Tab("Overview"):
326
+ gr.Markdown(_read_md("README.md"))
327
+ with gr.Tab("Validator"):
328
+ gr.Markdown(
329
+ "Submit a HuggingFace dataset to validate against a SimReady "
330
+ "profile. With **Open PR** enabled, the verdict is uploaded "
331
+ "back to the dataset as a `validation/` pull request."
332
+ )
333
+ with gr.Row():
334
+ dataset = gr.Textbox(
335
+ label="Dataset",
336
+ placeholder="org/dataset (e.g. imagineio/PhysicalAI-SimReady-Kitchens-v1)",
337
+ )
338
+ with gr.Row():
339
+ profile = gr.Dropdown(
340
+ choices=PROFILE_CHOICES, value=DEFAULT_PROFILE, label="Profile",
341
+ )
342
+ version = gr.Textbox(label="Version", value=DEFAULT_VERSION)
343
+ open_pr = gr.Checkbox(label="Open PR on dataset with verdict", value=False)
344
+ run_btn = gr.Button("Validate", variant="primary")
345
+ status_md = gr.Markdown(label="Verdict")
346
+ summary_box = gr.Textbox(label="Summary", interactive=False)
347
+ log_box = gr.Textbox(label="Log", lines=20, interactive=False)
348
+ report_link = gr.File(label="HTML report (download)", interactive=False)
349
+ with gr.Tab("Partner walkthrough"):
350
+ gr.Markdown(_read_md("VALIDATE.md"))
351
+
352
+ run_btn.click(
353
+ fn=_run_streaming,
354
+ inputs=[dataset, profile, version, open_pr],
355
+ outputs=[log_box, status_md, summary_box, report_link],
356
+ api_name="run",
357
+ )
358
+
359
+ # Programmatic endpoint — bound to invisible components so the UI
360
+ # doesn't render anything extra, but the Gradio queue still exposes
361
+ # an `/api/predict/run_api` route the gradio_client can hit. The
362
+ # outputs[0] is the JSON return; api_name turns it into a stable
363
+ # path the GitHub Actions caller depends on.
364
+ api_dataset = gr.Textbox(visible=False)
365
+ api_profile = gr.Textbox(visible=False)
366
+ api_version = gr.Textbox(visible=False)
367
+ api_open_pr = gr.Checkbox(visible=False)
368
+ api_submission_id = gr.Textbox(visible=False)
369
+ api_force = gr.Checkbox(visible=False)
370
+ api_preliminary = gr.Checkbox(visible=False)
371
+ api_output = gr.JSON(visible=False)
372
+ api_button = gr.Button(visible=False)
373
+ api_button.click(
374
+ fn=_run_api,
375
+ inputs=[api_dataset, api_profile, api_version, api_open_pr,
376
+ api_submission_id, api_force, api_preliminary],
377
+ outputs=api_output,
378
+ api_name="run_api",
379
+ )
380
+
381
+ # Progress endpoint — polled by the dashboard while a row is
382
+ # validating. CORS is open on /gradio_api/* by default, so the
383
+ # browser can fetch this from github.io directly without any
384
+ # GitHub-Actions side polling/commit churn.
385
+ prog_in = gr.Textbox(visible=False)
386
+ prog_out = gr.JSON(visible=False)
387
+ prog_button = gr.Button(visible=False)
388
+ prog_button.click(
389
+ fn=_get_progress,
390
+ inputs=[prog_in],
391
+ outputs=prog_out,
392
+ api_name="get_progress",
393
+ )
394
+
395
+ # Profile-listing endpoint — polled by the dashboard at startup
396
+ # so its dropdown reflects what's actually loadable on this Space
397
+ # right now (foundation+validator pin determines which profiles
398
+ # register). Stops the operator from picking something that
399
+ # would fatal at runtime.
400
+ profiles_out = gr.JSON(visible=False)
401
+ profiles_button = gr.Button(visible=False)
402
+ profiles_button.click(
403
+ fn=_list_profiles,
404
+ inputs=None,
405
+ outputs=profiles_out,
406
+ api_name="list_profiles",
407
+ )
408
+
409
+ # Cancel endpoint — the dashboard's Cancel button calls this AFTER
410
+ # cancelling the GH Action so the in-flight server-side gradio call
411
+ # actually stops (cancelling the Action alone only kills the
412
+ # gradio_client wrapper, the Space's loop keeps going).
413
+ cancel_in = gr.Textbox(visible=False)
414
+ cancel_token = gr.Textbox(visible=False)
415
+ cancel_out = gr.JSON(visible=False)
416
+ cancel_button = gr.Button(visible=False)
417
+ cancel_button.click(
418
+ fn=_cancel_run,
419
+ inputs=[cancel_in, cancel_token],
420
+ outputs=cancel_out,
421
+ api_name="cancel_run",
422
+ )
423
+
424
+
425
+ if __name__ == "__main__":
426
+ demo.queue().launch(
427
+ server_name=os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"),
428
+ server_port=int(os.environ.get("GRADIO_SERVER_PORT", "7860")),
429
+ )