Michael Rabinovich commited on
Commit
1748c76
·
1 Parent(s): 0501689

submit: write pending row + upload zip on successful validation

Browse files

Step 6 (E) chunk 3. Cheap-sync validation (chunk 2) now also commits
to the Hub: on success the handler uploads submissions/<id>.zip and
appends a status=pending row to results.jsonl under a process-wide
threading.Lock. Eval still stubbed, the row stays pending forever
until chunk 4 lands the background worker.

Hub-write ordering (after validation passes):
1. Upload submissions/<id>.zip. Unique path per submission, no
lock needed (HfApi handles its own commit serialization).
2. Build the pending row: metadata fields + status="pending" +
failure_reason=null + submission_blob_url + null score fields,
matching the "Pending" regime in cadgenbench-submissions/schema.md.
3. Acquire _HUB_LOCK, download current results.jsonl (treat
EntryNotFoundError as empty), append one JSON line, re-upload.
Lock held only for the RMW cycle (~1-2s), not for eval time;
concurrent submitters serialise here, not in the worker.

If step 1 fails the submitter sees a clean "Hub upload failed"
rejection. If step 3 fails the zip is left orphaned in submissions/
and the submitter sees a clean "Hub write failed" rejection; an
orphan-zip sweep is a future-chunk concern (covered by the eventual
stuck-pending recovery in chunk 6 plus a manual sweep if needed).

leaderboard.py:
- Add `status` as the first leaderboard column so pending rows are
visually distinguishable from completed ones.
- Backfill `status` to "completed" in-memory for legacy rows
written before the schema bump (the three baseline seeds);
populated rows are completed by definition.

cadgenbench_version and cadgenbench_data_revision get pinned on the
pending row at submit time so the row is meaningful even if the
worker never runs and chunk 6 ends up flipping it to failed.
data_revision is the short sha from HfApi().dataset_info; cached
per-process and falls back to "unknown" on Hub flake (it's a
metadata field, not worth failing a submit over).

HF_DATA_REPO and HF_SUBMISSIONS_REPO are imported from leaderboard.py
for now; pulling them into a shared config module is a worthwhile
refactor once submit.py grows again in chunks 4+.

Files changed (2) hide show
  1. leaderboard.py +8 -0
  2. submit.py +200 -10
leaderboard.py CHANGED
@@ -23,6 +23,7 @@ HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
23
  LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
24
 
25
  LEADERBOARD_COLS = [
 
26
  "submission_name",
27
  "submitter_name",
28
  "aggregate_score",
@@ -80,6 +81,13 @@ def load_leaderboard() -> pd.DataFrame:
80
  if not rows:
81
  return pd.DataFrame(columns=LEADERBOARD_COLS)
82
  df = pd.DataFrame(rows)
 
 
 
 
 
 
 
83
  cols = [c for c in LEADERBOARD_COLS if c in df.columns]
84
  df = (
85
  df[cols]
 
23
  LOCAL_RESULTS_PATH = Path(__file__).parent / "results.jsonl"
24
 
25
  LEADERBOARD_COLS = [
26
+ "status",
27
  "submission_name",
28
  "submitter_name",
29
  "aggregate_score",
 
81
  if not rows:
82
  return pd.DataFrame(columns=LEADERBOARD_COLS)
83
  df = pd.DataFrame(rows)
84
+ # Backfill `status` for legacy rows written before the schema bump
85
+ # (the three baseline seed rows). They all have populated score
86
+ # fields, so "completed" is the correct retrofit.
87
+ if "status" not in df.columns:
88
+ df["status"] = "completed"
89
+ else:
90
+ df["status"] = df["status"].fillna("completed")
91
  cols = [c for c in LEADERBOARD_COLS if c in df.columns]
92
  df = (
93
  df[cols]
submit.py CHANGED
@@ -1,10 +1,11 @@
1
  """Submit-tab handler for the CADGenBench leaderboard Space.
2
 
3
- Step 6 (E) chunk 2: the cheap-sync validation pipeline. The handler
4
- validates the upload and returns a placeholder message describing the
5
- submission it would have queued. No Hub writes, no eval kick-off, no
6
- background work. The async write + worker thread land in later chunks
7
- of Step 6 (E); see ``space-setup/step-6e-async.md``.
 
8
 
9
  Validation gates, in order:
10
 
@@ -23,19 +24,40 @@ Validation gates, in order:
23
  geometry. Per-fixture validity (watertight, manifold, etc) is
24
  *not* checked here, that's the evaluator's job and contributes to
25
  the per-fixture score; this gate only rejects "not actually STEP".
 
 
 
 
 
 
 
 
 
 
 
 
26
  """
27
  from __future__ import annotations
28
 
29
  import json
 
30
  import re
31
  import tempfile
 
32
  import zipfile
33
  from datetime import datetime, timezone
34
  from pathlib import Path
35
  from typing import Any
36
 
 
37
  from cadgenbench.common.paths import data_inputs_dir
38
  from cadgenbench.common.validity import parse_step
 
 
 
 
 
 
39
 
40
  NOTES_MAX_CHARS = 500
41
  REQUIRED_META_KEYS: tuple[str, ...] = (
@@ -46,12 +68,31 @@ REQUIRED_META_KEYS: tuple[str, ...] = (
46
  "agree_to_publish",
47
  )
48
  SUBMISSION_ID_SLUG_MAX = 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
 
51
  class _ValidationError(Exception):
52
  """Internal sentinel that maps to a user-facing rejection message."""
53
 
54
 
 
 
 
 
55
  def handle_submit(
56
  zip_file,
57
  submission_name: str, # noqa: ARG001 - kept for UI compat; meta.json wins
@@ -93,12 +134,19 @@ def handle_submit(
93
  submission_id = _mint_submission_id(
94
  meta["submitter_name"], meta["submission_name"]
95
  )
 
 
 
 
 
 
 
96
  return (
97
- f"**Validation OK.** Would queue submission `{submission_id}` "
98
- f"(submitter: `{meta['submitter_name']}`, system: "
99
- f"`{meta['submission_name']}`, {len(fixture_names)} fixtures).\n\n"
100
- f"_Chunk 2 of Step 6 (E): validation only. Hub write + eval "
101
- f"kick-off land in the next chunk._"
102
  )
103
 
104
 
@@ -259,3 +307,145 @@ def _slug(s: str) -> str:
259
  """Filesystem-safe slug. Lowercase, ``[a-z0-9-]``, collapsed dashes."""
260
  cleaned = re.sub(r"[^A-Za-z0-9]+", "-", s).strip("-").lower()
261
  return cleaned[:SUBMISSION_ID_SLUG_MAX] or "unnamed"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """Submit-tab handler for the CADGenBench leaderboard Space.
2
 
3
+ Step 6 (E) chunks 2 + 3: cheap-sync validation pipeline + pending-row
4
+ write + zip upload. The handler validates the upload, uploads the
5
+ zip to ``submissions/<id>.zip``, appends a ``status: pending`` row to
6
+ ``results.jsonl`` (under a process-wide lock), and returns
7
+ immediately. No eval and no worker yet, the row stays pending
8
+ forever until later chunks add the background thread.
9
 
10
  Validation gates, in order:
11
 
 
24
  geometry. Per-fixture validity (watertight, manifold, etc) is
25
  *not* checked here, that's the evaluator's job and contributes to
26
  the per-fixture score; this gate only rejects "not actually STEP".
27
+
28
+ Hub-write ordering (after validation passes):
29
+
30
+ 1. Upload ``submissions/<id>.zip``. Unique path per submission, no
31
+ lock needed.
32
+ 2. Build pending row (metadata + null scores + ``submission_blob_url``).
33
+ 3. Acquire ``_HUB_LOCK``; download current ``results.jsonl`` (or
34
+ start empty); append the pending row; re-upload.
35
+
36
+ If step 1 fails the user sees a clean rejection. If step 3 fails the
37
+ zip is left orphaned in ``submissions/`` and the user sees a clean
38
+ rejection; an orphan-zip sweep is a future-chunk concern.
39
  """
40
  from __future__ import annotations
41
 
42
  import json
43
+ import logging
44
  import re
45
  import tempfile
46
+ import threading
47
  import zipfile
48
  from datetime import datetime, timezone
49
  from pathlib import Path
50
  from typing import Any
51
 
52
+ import cadgenbench
53
  from cadgenbench.common.paths import data_inputs_dir
54
  from cadgenbench.common.validity import parse_step
55
+ from huggingface_hub import HfApi
56
+ from huggingface_hub.errors import EntryNotFoundError
57
+
58
+ from leaderboard import HF_DATA_REPO, HF_SUBMISSIONS_REPO
59
+
60
+ logger = logging.getLogger(__name__)
61
 
62
  NOTES_MAX_CHARS = 500
63
  REQUIRED_META_KEYS: tuple[str, ...] = (
 
68
  "agree_to_publish",
69
  )
70
  SUBMISSION_ID_SLUG_MAX = 40
71
+ RESULTS_FILENAME = "results.jsonl"
72
+ SUBMISSIONS_DIR = "submissions"
73
+ DATA_REV_SHORT_LEN = 12
74
+
75
+ # One HfApi client per process. HF_TOKEN is picked up from the env at
76
+ # construction time and reused for every call.
77
+ _HF_API = HfApi()
78
+
79
+ # Process-wide lock guarding the read-modify-write of results.jsonl.
80
+ # The Space is single-process so a threading.Lock is sufficient; held
81
+ # only for the duration of the RMW cycle (~1-2s), not for eval time.
82
+ _HUB_LOCK = threading.Lock()
83
+
84
+ # Lazily-resolved cadgenbench-data revision, cached per process.
85
+ _DATA_REVISION: str | None = None
86
 
87
 
88
  class _ValidationError(Exception):
89
  """Internal sentinel that maps to a user-facing rejection message."""
90
 
91
 
92
+ class _HubWriteError(Exception):
93
+ """Raised when a Hub upload fails after validation succeeded."""
94
+
95
+
96
  def handle_submit(
97
  zip_file,
98
  submission_name: str, # noqa: ARG001 - kept for UI compat; meta.json wins
 
134
  submission_id = _mint_submission_id(
135
  meta["submitter_name"], meta["submission_name"]
136
  )
137
+ try:
138
+ blob_url = _upload_submission_zip(submission_id, zip_path)
139
+ row = _build_pending_row(submission_id, meta, fixture_names, blob_url)
140
+ _append_pending_row(row)
141
+ except _HubWriteError as e:
142
+ return f"**Submission rejected.** {e}"
143
+
144
  return (
145
+ f"**Queued.** Submission `{submission_id}` has been accepted and a "
146
+ f"`pending` row added to the leaderboard (submitter: "
147
+ f"`{meta['submitter_name']}`, system: `{meta['submission_name']}`, "
148
+ f"{len(fixture_names)} fixtures). Evaluation will populate the "
149
+ f"score columns once the worker lands in a later chunk."
150
  )
151
 
152
 
 
307
  """Filesystem-safe slug. Lowercase, ``[a-z0-9-]``, collapsed dashes."""
308
  cleaned = re.sub(r"[^A-Za-z0-9]+", "-", s).strip("-").lower()
309
  return cleaned[:SUBMISSION_ID_SLUG_MAX] or "unnamed"
310
+
311
+
312
+ def _upload_submission_zip(submission_id: str, zip_path: Path) -> str:
313
+ """Upload the submission zip to ``submissions/<id>.zip``.
314
+
315
+ Returns the canonical Hub blob URL on success. Raises
316
+ :class:`_HubWriteError` with a short user-facing reason on
317
+ failure.
318
+ """
319
+ repo_path = f"{SUBMISSIONS_DIR}/{submission_id}.zip"
320
+ try:
321
+ _HF_API.upload_file(
322
+ path_or_fileobj=str(zip_path),
323
+ path_in_repo=repo_path,
324
+ repo_id=HF_SUBMISSIONS_REPO,
325
+ repo_type="dataset",
326
+ commit_message=f"add submission zip for {submission_id}",
327
+ )
328
+ except Exception as e: # noqa: BLE001 - Hub API surface is broad
329
+ logger.exception("Failed to upload submission zip %s", submission_id)
330
+ raise _HubWriteError(
331
+ f"Server-side error uploading submission zip "
332
+ f"({type(e).__name__}: {e}). Please try again later."
333
+ ) from e
334
+ return (
335
+ f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}"
336
+ f"/resolve/main/{repo_path}"
337
+ )
338
+
339
+
340
+ def _build_pending_row(
341
+ submission_id: str,
342
+ meta: dict[str, Any],
343
+ fixture_names: set[str], # noqa: ARG001 - kept for future per-fixture pre-fill
344
+ blob_url: str,
345
+ ) -> dict[str, Any]:
346
+ """Construct the JSON row written for a freshly-queued submission.
347
+
348
+ Mirrors the pending regime in ``cadgenbench-submissions/schema.md``:
349
+ metadata + ``status: pending`` + ``submission_blob_url``; every
350
+ score-shaped field is ``null`` until the worker flips the row.
351
+ """
352
+ return {
353
+ "submission_id": submission_id,
354
+ "status": "pending",
355
+ "failure_reason": None,
356
+ "submitter_name": meta["submitter_name"],
357
+ "submission_name": meta["submission_name"],
358
+ "agent_url": meta["agent_url"],
359
+ "notes": meta["notes"],
360
+ "submitted_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
361
+ "cadgenbench_version": cadgenbench.__version__,
362
+ "cadgenbench_data_revision": _resolve_data_revision(),
363
+ "validity_rate": None,
364
+ "aggregate_score": None,
365
+ "score_by_task_type": None,
366
+ "per_task_scores": None,
367
+ "per_fixture_scores": None,
368
+ "per_fixture_breakdown": None,
369
+ "submission_blob_url": blob_url,
370
+ }
371
+
372
+
373
+ def _append_pending_row(row: dict[str, Any]) -> None:
374
+ """Append a pending row to ``results.jsonl`` on the Hub under the lock.
375
+
376
+ Read-modify-write: download the current file (or treat as empty if
377
+ it doesn't exist yet), append one line, re-upload. The lock is
378
+ held only for the duration of this cycle (~1-2s), not for any
379
+ background eval; concurrent submitters serialise here, not on the
380
+ eval pipeline.
381
+ """
382
+ with _HUB_LOCK:
383
+ try:
384
+ existing = _download_results_jsonl()
385
+ except Exception as e: # noqa: BLE001 - Hub API surface is broad
386
+ logger.exception("Failed to download results.jsonl for append")
387
+ raise _HubWriteError(
388
+ f"Server-side error reading the submissions table "
389
+ f"({type(e).__name__}: {e}). Please try again later."
390
+ ) from e
391
+
392
+ line = json.dumps(row, ensure_ascii=False)
393
+ new_body = existing + line + "\n" if existing else line + "\n"
394
+
395
+ try:
396
+ _HF_API.upload_file(
397
+ path_or_fileobj=new_body.encode("utf-8"),
398
+ path_in_repo=RESULTS_FILENAME,
399
+ repo_id=HF_SUBMISSIONS_REPO,
400
+ repo_type="dataset",
401
+ commit_message=(
402
+ f"add pending row for {row['submission_id']}"
403
+ ),
404
+ )
405
+ except Exception as e: # noqa: BLE001 - Hub API surface is broad
406
+ logger.exception(
407
+ "Failed to upload appended results.jsonl for %s",
408
+ row["submission_id"],
409
+ )
410
+ raise _HubWriteError(
411
+ f"Server-side error writing the submissions table "
412
+ f"({type(e).__name__}: {e}). The submission zip was "
413
+ f"uploaded but the row was not; please try again later."
414
+ ) from e
415
+
416
+
417
+ def _download_results_jsonl() -> str:
418
+ """Fetch the current ``results.jsonl`` body as text, or ``""`` if absent."""
419
+ from huggingface_hub import hf_hub_download
420
+
421
+ try:
422
+ path = hf_hub_download(
423
+ repo_id=HF_SUBMISSIONS_REPO,
424
+ filename=RESULTS_FILENAME,
425
+ repo_type="dataset",
426
+ force_download=True,
427
+ )
428
+ except EntryNotFoundError:
429
+ return ""
430
+ return Path(path).read_text(encoding="utf-8")
431
+
432
+
433
+ def _resolve_data_revision() -> str:
434
+ """Return a short sha for the cadgenbench-data dataset, cached per process.
435
+
436
+ Falls back to ``"unknown"`` on Hub errors so a flaky network can't
437
+ block a submission over a metadata field.
438
+ """
439
+ global _DATA_REVISION
440
+ if _DATA_REVISION is not None:
441
+ return _DATA_REVISION
442
+ try:
443
+ info = _HF_API.dataset_info(HF_DATA_REPO)
444
+ _DATA_REVISION = (info.sha or "unknown")[:DATA_REV_SHORT_LEN]
445
+ except Exception as e: # noqa: BLE001 - metadata only, don't fail the submit
446
+ logger.warning(
447
+ "Failed to resolve cadgenbench-data revision (%s: %s)",
448
+ type(e).__name__, e,
449
+ )
450
+ _DATA_REVISION = "unknown"
451
+ return _DATA_REVISION