| """Submit-tab handler for the CADGenBench leaderboard Space. |
| |
| Step 6 (E) chunks 2 + 3 + 4: cheap-sync validation pipeline + pending-row |
| write + zip upload + background-thread eval. The handler validates |
| the upload, uploads the zip to ``submissions/<id>.zip``, appends a |
| ``status: pending`` row to ``results.jsonl`` (under a process-wide |
| lock), spawns a daemon thread to run ``cadgenbench evaluate`` + |
| ``cadgenbench report single``, and returns immediately. The worker |
| uploads ``reports/<id>.{html,json}`` and flips the row |
| ``pending -> completed`` (or ``failed`` with a ``failure_reason``). |
| |
| Validation gates, in order: |
| |
| 1. Form-level: a file was attached. |
| 2. Zip safety: parseable as a zip, no absolute / parent-traversing |
| entry names, no symlinks. |
| 3. ``meta.json`` schema: required keys present, types sane, |
| ``agree_to_publish`` is literally ``true`` (the sole consent |
| gate; no separate UI checkbox), ``notes`` is non-empty when |
| present and within the per-submission cap. |
| 4. Fixture-set match: the set of folders inside the zip equals the |
| set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir` |
| (no missing, no extras). |
| 5. STEP parseability: each ``<fixture>/output.step`` loads as STEP |
| geometry. Per-fixture validity (watertight, manifold, etc) is |
| *not* checked here, that's the evaluator's job and contributes to |
| the per-fixture score; this gate only rejects "not actually STEP". |
| |
| Hub-write ordering (after validation passes): |
| |
| 1. Upload ``submissions/<id>.zip``. Unique path per submission, no |
| lock needed. |
| 2. Build pending row (metadata + null scores + ``submission_blob_url``). |
| 3. Acquire ``_HUB_LOCK``; download current ``results.jsonl`` (or |
| start empty); append the pending row; re-upload. |
| 4. Spawn worker thread (daemon, named after submission_id). The |
| worker owns the tempdir's lifecycle past this point. |
| |
| If step 1 fails the user sees a clean rejection. If step 3 fails the |
| zip is left orphaned in ``submissions/`` and the user sees a clean |
| rejection; an orphan-zip sweep is a future-chunk concern. |
| |
| Background worker, per submission: |
| |
| 1. ``cadgenbench evaluate <run_dir>`` (subprocess; runs |
| per-fixture eval in parallel via the CLI's ProcessPoolExecutor; |
| writes ``run_summary.json`` at the run-dir root). |
| 2. ``cadgenbench report single <run_dir> -o <report.html>`` |
| (subprocess; self-contained HTML with embedded renders). |
| 3. Upload ``reports/<id>.html`` + ``reports/<id>.json``. The JSON |
| bundles ``run_summary.json`` + every per-fixture ``result.json``. |
| 4. Read ``run_summary.json``; under ``_HUB_LOCK`` flip the row's |
| ``status`` to ``"completed"`` and merge the score fields. |
| 5. On any worker-side exception, flip the row to ``"failed"`` with |
| a short ``failure_reason``. Tempdir cleanup runs in ``finally`` |
| either way. |
| """ |
| from __future__ import annotations |
|
|
| import json |
| import logging |
| import os |
| import re |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| import threading |
| import zipfile |
| from datetime import datetime, timezone |
| from pathlib import Path |
| from typing import Any |
|
|
| import cadgenbench |
| from cadgenbench.common.paths import data_inputs_dir |
| from cadgenbench.common.validity import parse_step |
| from huggingface_hub import HfApi |
| from huggingface_hub.errors import EntryNotFoundError |
|
|
| from leaderboard import HF_DATA_REPO, HF_SUBMISSIONS_REPO |
|
|
| logger = logging.getLogger(__name__) |
|
|
| NOTES_MAX_CHARS = 500 |
| REQUIRED_META_KEYS: tuple[str, ...] = ( |
| "submitter_name", |
| "submission_name", |
| "agent_url", |
| "notes", |
| "agree_to_publish", |
| ) |
| SUBMISSION_ID_SLUG_MAX = 40 |
| RESULTS_FILENAME = "results.jsonl" |
| SUBMISSIONS_DIR = "submissions" |
| REPORTS_DIR = "reports" |
| DATA_REV_SHORT_LEN = 12 |
| FAILURE_REASON_MAX_CHARS = 200 |
| EVAL_TIMEOUT_SECONDS = 15 * 60 |
| REPORT_TIMEOUT_SECONDS = 2 * 60 |
| EVAL_WORKER_COUNT = "8" |
|
|
| |
| |
| _HF_API = HfApi() |
|
|
| |
| |
| |
| _HUB_LOCK = threading.Lock() |
|
|
| |
| _DATA_REVISION: str | None = None |
|
|
|
|
| class _ValidationError(Exception): |
| """Internal sentinel that maps to a user-facing rejection message.""" |
|
|
|
|
| class _HubWriteError(Exception): |
| """Raised when a Hub upload fails after validation succeeded.""" |
|
|
|
|
| def handle_submit(zip_file) -> str: |
| """Validate a submission upload and return a markdown UI message. |
| |
| Returns one of: |
| - An error string starting with ``**Error:**`` for form-level |
| rejects (no zip attached). |
| - A rejection string starting with ``**Submission rejected.**`` |
| for any of the deeper validation gates or Hub-write failures. |
| - A success string starting with ``**Queued.**`` carrying the |
| minted ``submission_id`` once the pending row + zip are on |
| the Hub. |
| """ |
| form_err = _validate_form(zip_file) |
| if form_err is not None: |
| return form_err |
|
|
| zip_path = Path(zip_file.name) |
|
|
| |
| |
| |
| |
| tmp = Path(tempfile.mkdtemp(prefix="cadgenbench-submit-")) |
| run_dir = tmp / "run" |
| run_dir.mkdir() |
| try: |
| try: |
| _extract_zip(zip_path, run_dir) |
| meta = _load_and_validate_meta(run_dir) |
| fixture_names = _validate_fixture_set(run_dir) |
| _validate_steps_parseable(run_dir, fixture_names) |
| except _ValidationError as e: |
| return f"**Submission rejected.** {e}" |
|
|
| submission_id = _mint_submission_id( |
| meta["submitter_name"], meta["submission_name"] |
| ) |
| try: |
| blob_url = _upload_submission_zip(submission_id, zip_path) |
| row = _build_pending_row( |
| submission_id, meta, fixture_names, blob_url |
| ) |
| _append_pending_row(row) |
| except _HubWriteError as e: |
| return f"**Submission rejected.** {e}" |
|
|
| _spawn_worker(submission_id, tmp, run_dir) |
| tmp = None |
| finally: |
| if tmp is not None: |
| shutil.rmtree(tmp, ignore_errors=True) |
|
|
| return ( |
| f"**Queued.** Submission `{submission_id}` has been accepted " |
| f"(submitter: `{meta['submitter_name']}`, system: " |
| f"`{meta['submission_name']}`, {len(fixture_names)} fixtures). " |
| f"Evaluation typically takes 2-5 minutes on this Space's " |
| f"`cpu-upgrade` tier; the row flips to `completed` with score " |
| f"columns populated when the worker finishes." |
| ) |
|
|
|
|
| def _validate_form(zip_file) -> str | None: |
| if zip_file is None: |
| return "**Error:** please attach a submission zip." |
| return None |
|
|
|
|
| def _extract_zip(zip_path: Path, target: Path) -> None: |
| """Extract *zip_path* into *target* with zip-slip + symlink rejection. |
| |
| Python's ``ZipFile.extractall`` since 3.12 normalises away unsafe |
| paths silently; we'd rather reject the upload outright so the |
| submitter sees a clear error instead of getting a "fixture set |
| mismatch" downstream because half their files were dropped. |
| """ |
| try: |
| with zipfile.ZipFile(zip_path) as zf: |
| for info in zf.infolist(): |
| if info.is_dir(): |
| continue |
| name = Path(info.filename) |
| if name.is_absolute() or ".." in name.parts: |
| raise _ValidationError( |
| f"Zip contains an unsafe path: {info.filename!r}." |
| ) |
| |
| |
| mode = info.external_attr >> 16 |
| if mode and (mode & 0o170000) == 0o120000: |
| raise _ValidationError( |
| f"Zip contains a symlink ({info.filename!r}); " |
| f"submissions must be plain files." |
| ) |
| zf.extractall(target) |
| except zipfile.BadZipFile as e: |
| raise _ValidationError(f"Upload is not a valid zip file: {e}") from e |
|
|
|
|
| def _load_and_validate_meta(unpacked: Path) -> dict[str, Any]: |
| meta_path = unpacked / "meta.json" |
| if not meta_path.is_file(): |
| raise _ValidationError( |
| "Zip is missing top-level `meta.json` (expected at the root of " |
| "the zip, alongside the per-fixture folders)." |
| ) |
| try: |
| meta = json.loads(meta_path.read_text()) |
| except json.JSONDecodeError as e: |
| raise _ValidationError( |
| f"`meta.json` is not valid JSON: {e.msg} (line {e.lineno})." |
| ) from e |
| if not isinstance(meta, dict): |
| raise _ValidationError( |
| "`meta.json` must be a JSON object at the top level." |
| ) |
|
|
| missing = [k for k in REQUIRED_META_KEYS if k not in meta] |
| if missing: |
| raise _ValidationError( |
| f"`meta.json` is missing required key(s): {', '.join(missing)}." |
| ) |
|
|
| for k in ("submitter_name", "submission_name"): |
| v = meta[k] |
| if not isinstance(v, str) or not v.strip(): |
| raise _ValidationError( |
| f"`meta.json` field `{k}` must be a non-empty string." |
| ) |
|
|
| for k in ("agent_url", "notes"): |
| v = meta[k] |
| if v is not None and not isinstance(v, str): |
| raise _ValidationError( |
| f"`meta.json` field `{k}` must be a string or null." |
| ) |
|
|
| if meta["agree_to_publish"] is not True: |
| raise _ValidationError( |
| "`meta.json` field `agree_to_publish` must be the literal boolean " |
| "`true`." |
| ) |
|
|
| if meta["notes"] is not None: |
| meta["notes"] = _normalize_notes(meta["notes"]) |
|
|
| return meta |
|
|
|
|
| def _normalize_notes(raw: str) -> str: |
| """Collapse newlines + tabs to spaces, strip, enforce the char cap.""" |
| one_line = re.sub(r"[\r\n\t]+", " ", raw).strip() |
| if len(one_line) > NOTES_MAX_CHARS: |
| raise _ValidationError( |
| f"`meta.json` field `notes` exceeds the {NOTES_MAX_CHARS}-char " |
| f"cap (got {len(one_line)} after stripping). Trim and resubmit." |
| ) |
| return one_line |
|
|
|
|
| def _validate_fixture_set(unpacked: Path) -> set[str]: |
| """Compare unpacked top-level dirs to the inputs dataset's fixture set.""" |
| actual = {p.name for p in unpacked.iterdir() if p.is_dir()} |
|
|
| try: |
| inputs_root = data_inputs_dir() |
| except Exception as e: |
| raise _ValidationError( |
| f"Server-side error resolving the fixture set " |
| f"({type(e).__name__}: {e})." |
| ) from e |
| expected = {p.name for p in inputs_root.iterdir() if p.is_dir()} |
|
|
| missing = expected - actual |
| extras = actual - expected |
| if missing or extras: |
| parts: list[str] = [] |
| if missing: |
| parts.append(f"missing fixture(s): {', '.join(sorted(missing))}") |
| if extras: |
| parts.append(f"unexpected folder(s): {', '.join(sorted(extras))}") |
| raise _ValidationError( |
| "Fixture set does not match the dataset. " + "; ".join(parts) + "." |
| ) |
| return expected |
|
|
|
|
| def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None: |
| for name in sorted(fixture_names): |
| step = unpacked / name / "output.step" |
| if not step.is_file(): |
| raise _ValidationError( |
| f"Fixture `{name}` is missing its `output.step` file." |
| ) |
| if step.stat().st_size == 0: |
| raise _ValidationError( |
| f"Fixture `{name}` has an empty `output.step`." |
| ) |
| try: |
| parse_step(step) |
| except RuntimeError as e: |
| raise _ValidationError( |
| f"Fixture `{name}` has an `output.step` that is not loadable " |
| f"as STEP geometry: {e}" |
| ) from e |
|
|
|
|
| def _mint_submission_id(submitter_name: str, submission_name: str) -> str: |
| """Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``.""" |
| ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") |
| return f"{_slug(submitter_name)}_{_slug(submission_name)}_{ts}" |
|
|
|
|
| def _slug(s: str) -> str: |
| """Filesystem-safe slug. Lowercase, ``[a-z0-9-]``, collapsed dashes.""" |
| cleaned = re.sub(r"[^A-Za-z0-9]+", "-", s).strip("-").lower() |
| return cleaned[:SUBMISSION_ID_SLUG_MAX] or "unnamed" |
|
|
|
|
| def _upload_submission_zip(submission_id: str, zip_path: Path) -> str: |
| """Upload the submission zip to ``submissions/<id>.zip``. |
| |
| Returns the canonical Hub blob URL on success. Raises |
| :class:`_HubWriteError` with a short user-facing reason on |
| failure. |
| """ |
| repo_path = f"{SUBMISSIONS_DIR}/{submission_id}.zip" |
| try: |
| _HF_API.upload_file( |
| path_or_fileobj=str(zip_path), |
| path_in_repo=repo_path, |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| commit_message=f"add submission zip for {submission_id}", |
| ) |
| except Exception as e: |
| logger.exception("Failed to upload submission zip %s", submission_id) |
| raise _HubWriteError( |
| f"Server-side error uploading submission zip " |
| f"({type(e).__name__}: {e}). Please try again later." |
| ) from e |
| return ( |
| f"https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}" |
| f"/resolve/main/{repo_path}" |
| ) |
|
|
|
|
| def _build_pending_row( |
| submission_id: str, |
| meta: dict[str, Any], |
| fixture_names: set[str], |
| blob_url: str, |
| ) -> dict[str, Any]: |
| """Construct the JSON row written for a freshly-queued submission. |
| |
| Mirrors the pending regime in ``cadgenbench-submissions/schema.md``: |
| metadata + ``status: pending`` + ``submission_blob_url``; every |
| score-shaped field is ``null`` until the worker flips the row. |
| """ |
| return { |
| "submission_id": submission_id, |
| "status": "pending", |
| "failure_reason": None, |
| "submitter_name": meta["submitter_name"], |
| "submission_name": meta["submission_name"], |
| "agent_url": meta["agent_url"], |
| "notes": meta["notes"], |
| "submitted_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), |
| "cadgenbench_version": cadgenbench.__version__, |
| "cadgenbench_data_revision": _resolve_data_revision(), |
| "validity_rate": None, |
| "aggregate_score": None, |
| "score_by_task_type": None, |
| "per_task_scores": None, |
| "per_fixture_scores": None, |
| "per_fixture_breakdown": None, |
| "submission_blob_url": blob_url, |
| } |
|
|
|
|
| def _append_pending_row(row: dict[str, Any]) -> None: |
| """Append a pending row to ``results.jsonl`` on the Hub under the lock.""" |
| submission_id = row["submission_id"] |
|
|
| def mutate(rows: list[dict[str, Any]]) -> None: |
| rows.append(row) |
|
|
| try: |
| _hub_rmw_results( |
| mutate, commit_message=f"add pending row for {submission_id}" |
| ) |
| except Exception as e: |
| logger.exception( |
| "Failed RMW of results.jsonl while appending pending row for %s", |
| submission_id, |
| ) |
| raise _HubWriteError( |
| f"Server-side error writing the submissions table " |
| f"({type(e).__name__}: {e}). The submission zip was uploaded " |
| f"but the row was not; please try again later." |
| ) from e |
|
|
|
|
| def _update_row(submission_id: str, updates: dict[str, Any]) -> None: |
| """Find the row for *submission_id* and merge *updates* into it. |
| |
| Raises ``LookupError`` if no row with that id exists (worker fired |
| before the pending row was committed, which shouldn't happen, but |
| surfaces clearly if it ever does). |
| """ |
| def mutate(rows: list[dict[str, Any]]) -> None: |
| for r in rows: |
| if r.get("submission_id") == submission_id: |
| r.update(updates) |
| return |
| raise LookupError( |
| f"No row with submission_id={submission_id!r} in results.jsonl." |
| ) |
|
|
| _hub_rmw_results( |
| mutate, |
| commit_message=( |
| f"flip row for {submission_id} -> {updates.get('status', '?')}" |
| ), |
| ) |
|
|
|
|
| def _hub_rmw_results( |
| mutate, *, commit_message: str, |
| ) -> None: |
| """Lock + download + mutate + upload of ``results.jsonl``. |
| |
| The lock is held only for the read-modify-write cycle (~1-2s), |
| never for eval time. Concurrent submitters serialise here, not |
| in the eval pipeline. Treats a missing file as the empty list. |
| """ |
| with _HUB_LOCK: |
| existing = _download_results_jsonl() |
| rows: list[dict[str, Any]] = [ |
| json.loads(line) for line in existing.splitlines() if line.strip() |
| ] |
| mutate(rows) |
| new_body = ( |
| "\n".join(json.dumps(r, ensure_ascii=False) for r in rows) + "\n" |
| if rows |
| else "" |
| ) |
| _HF_API.upload_file( |
| path_or_fileobj=new_body.encode("utf-8"), |
| path_in_repo=RESULTS_FILENAME, |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| commit_message=commit_message, |
| ) |
|
|
|
|
| def _download_results_jsonl() -> str: |
| """Fetch the current ``results.jsonl`` body as text, or ``""`` if absent.""" |
| from huggingface_hub import hf_hub_download |
|
|
| try: |
| path = hf_hub_download( |
| repo_id=HF_SUBMISSIONS_REPO, |
| filename=RESULTS_FILENAME, |
| repo_type="dataset", |
| force_download=True, |
| ) |
| except EntryNotFoundError: |
| return "" |
| return Path(path).read_text(encoding="utf-8") |
|
|
|
|
| def _resolve_data_revision() -> str: |
| """Return a short sha for the cadgenbench-data dataset, cached per process. |
| |
| Falls back to ``"unknown"`` on Hub errors so a flaky network can't |
| block a submission over a metadata field. |
| """ |
| global _DATA_REVISION |
| if _DATA_REVISION is not None: |
| return _DATA_REVISION |
| try: |
| info = _HF_API.dataset_info(HF_DATA_REPO) |
| _DATA_REVISION = (info.sha or "unknown")[:DATA_REV_SHORT_LEN] |
| except Exception as e: |
| logger.warning( |
| "Failed to resolve cadgenbench-data revision (%s: %s)", |
| type(e).__name__, e, |
| ) |
| _DATA_REVISION = "unknown" |
| return _DATA_REVISION |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _spawn_worker(submission_id: str, tmp: Path, run_dir: Path) -> None: |
| """Start the eval worker thread. Fire-and-forget; daemon=True so a |
| Space restart doesn't block on in-flight workers (chunk 6's |
| boot-time sweep flips any rows their workers didn't finish to |
| failed). |
| """ |
| t = threading.Thread( |
| target=_run_worker, |
| args=(submission_id, tmp, run_dir), |
| name=f"cgb-worker-{submission_id}", |
| daemon=True, |
| ) |
| t.start() |
|
|
|
|
| def _run_worker(submission_id: str, tmp: Path, run_dir: Path) -> None: |
| """Top-level worker entry: run eval, build + upload reports, flip row. |
| |
| Any exception in the pipeline flips the row to ``failed`` with a |
| short ``failure_reason`` (full traceback goes to the Space's |
| runtime logs). The tempdir is always cleaned up. |
| """ |
| try: |
| try: |
| _run_eval(run_dir) |
| report_html = tmp / f"{submission_id}.html" |
| _run_report(run_dir, report_html) |
| report_json = _build_report_json(run_dir) |
| _upload_reports(submission_id, report_html, report_json) |
| summary = json.loads( |
| (run_dir / "run_summary.json").read_text(encoding="utf-8") |
| ) |
| _flip_row_to_completed(submission_id, summary) |
| logger.info("Worker completed for %s", submission_id) |
| except Exception as e: |
| logger.exception("Worker failed for %s", submission_id) |
| reason = f"{type(e).__name__}: {str(e)}"[:FAILURE_REASON_MAX_CHARS] |
| try: |
| _flip_row_to_failed(submission_id, reason) |
| except Exception: |
| |
| |
| |
| logger.exception( |
| "Failed to flip row to failed for %s; row stays pending", |
| submission_id, |
| ) |
| finally: |
| shutil.rmtree(tmp, ignore_errors=True) |
|
|
|
|
| def _run_eval(run_dir: Path) -> None: |
| """Invoke ``cadgenbench evaluate`` over the run_dir; raise on non-zero.""" |
| cmd = [ |
| sys.executable, "-m", "cadgenbench.cli", "evaluate", str(run_dir), |
| "--workers", EVAL_WORKER_COUNT, |
| ] |
| logger.info("Running eval: %s", " ".join(cmd)) |
| proc = subprocess.run( |
| cmd, |
| capture_output=True, |
| text=True, |
| timeout=EVAL_TIMEOUT_SECONDS, |
| env=os.environ.copy(), |
| check=False, |
| ) |
| if proc.returncode != 0: |
| |
| tail = (proc.stderr or proc.stdout or "")[-500:].strip() |
| raise RuntimeError( |
| f"cadgenbench evaluate exited {proc.returncode}: {tail}" |
| ) |
|
|
|
|
| def _run_report(run_dir: Path, html_out: Path) -> None: |
| """Invoke ``cadgenbench report single`` for the run_dir; raise on non-zero.""" |
| cmd = [ |
| sys.executable, "-m", "cadgenbench.cli", "report", "single", |
| str(run_dir), "-o", str(html_out), |
| ] |
| logger.info("Running report: %s", " ".join(cmd)) |
| proc = subprocess.run( |
| cmd, |
| capture_output=True, |
| text=True, |
| timeout=REPORT_TIMEOUT_SECONDS, |
| env=os.environ.copy(), |
| check=False, |
| ) |
| if proc.returncode != 0 or not html_out.is_file(): |
| tail = (proc.stderr or proc.stdout or "")[-500:].strip() |
| raise RuntimeError( |
| f"cadgenbench report single exited {proc.returncode}: {tail}" |
| ) |
|
|
|
|
| def _build_report_json(run_dir: Path) -> dict[str, Any]: |
| """Bundle ``run_summary.json`` + every per-fixture ``result.json``.""" |
| summary_path = run_dir / "run_summary.json" |
| if not summary_path.is_file(): |
| raise RuntimeError( |
| f"run_summary.json not produced under {run_dir} (eval issue?)" |
| ) |
| summary = json.loads(summary_path.read_text(encoding="utf-8")) |
| per_fixture: dict[str, dict[str, Any]] = {} |
| for fixture_dir in sorted(d for d in run_dir.iterdir() if d.is_dir()): |
| rp = fixture_dir / "result.json" |
| if rp.is_file(): |
| per_fixture[fixture_dir.name] = json.loads( |
| rp.read_text(encoding="utf-8") |
| ) |
| return {"run_summary": summary, "per_fixture_results": per_fixture} |
|
|
|
|
| def _upload_reports( |
| submission_id: str, html_path: Path, report_json: dict[str, Any], |
| ) -> None: |
| """Upload ``reports/<id>.html`` and ``reports/<id>.json`` to the Hub.""" |
| _HF_API.upload_file( |
| path_or_fileobj=str(html_path), |
| path_in_repo=f"{REPORTS_DIR}/{submission_id}.html", |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| commit_message=f"add HTML report for {submission_id}", |
| ) |
| _HF_API.upload_file( |
| path_or_fileobj=json.dumps(report_json, ensure_ascii=False, indent=2).encode("utf-8"), |
| path_in_repo=f"{REPORTS_DIR}/{submission_id}.json", |
| repo_id=HF_SUBMISSIONS_REPO, |
| repo_type="dataset", |
| commit_message=f"add JSON report for {submission_id}", |
| ) |
|
|
|
|
| def _flip_row_to_completed(submission_id: str, summary: dict[str, Any]) -> None: |
| """Merge ``run_summary.json`` fields into the pending row.""" |
| updates: dict[str, Any] = { |
| "status": "completed", |
| "failure_reason": None, |
| "cadgenbench_data_revision": _resolve_data_revision(), |
| "aggregate_score": summary.get("aggregate_score"), |
| "validity_rate": summary.get("validity_rate"), |
| "score_by_task_type": summary.get("score_by_task_type"), |
| "per_task_scores": summary.get("per_task_scores"), |
| "per_fixture_scores": summary.get("per_fixture_scores"), |
| } |
| _update_row(submission_id, updates) |
|
|
|
|
| def _flip_row_to_failed(submission_id: str, reason: str) -> None: |
| """Mark the row as ``failed`` with a short reason; scores stay null.""" |
| _update_row( |
| submission_id, |
| {"status": "failed", "failure_reason": reason}, |
| ) |
|
|