Michael Rabinovich commited on
Commit ·
dc8ff2a
1
Parent(s): daae24c
submit: allow missing fixture outputs
Browse files- Dockerfile +1 -1
- app.py +3 -1
- submit.py +21 -11
- tests/test_submit.py +28 -0
Dockerfile
CHANGED
|
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
-
ARG CADGENBENCH_SHA=
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
|
|
|
| 41 |
# image rebuild picks up the latest code (pre-v1: always-updated). Lock
|
| 42 |
# to a specific commit SHA at the v1 release so published scores are
|
| 43 |
# reproducible (see space-setup/post-gt-swap.md Stage F).
|
| 44 |
+
ARG CADGENBENCH_SHA=b3ff8f0
|
| 45 |
# Cache-bust the install below whenever the tracked ref moves: the
|
| 46 |
# GitHub commits endpoint's response changes with each new commit on
|
| 47 |
# `main`, so BuildKit re-fetches and invalidates the cached pip layer.
|
app.py
CHANGED
|
@@ -745,7 +745,9 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
|
|
| 745 |
f"""
|
| 746 |
**Submission format.** A single zip with:
|
| 747 |
|
| 748 |
-
- one folder per fixture in `{HF_DATA_REPO}`
|
|
|
|
|
|
|
| 749 |
- a top-level `meta.json`:
|
| 750 |
|
| 751 |
```json
|
|
|
|
| 745 |
f"""
|
| 746 |
**Submission format.** A single zip with:
|
| 747 |
|
| 748 |
+
- one folder per fixture in `{HF_DATA_REPO}`; include `output.step` for
|
| 749 |
+
fixtures where your system produced a candidate. Missing `output.step`
|
| 750 |
+
scores zero for that fixture;
|
| 751 |
- a top-level `meta.json`:
|
| 752 |
|
| 753 |
```json
|
submit.py
CHANGED
|
@@ -44,10 +44,11 @@ Validation gates, in order:
|
|
| 44 |
4. Fixture-set match: the set of folders inside the zip equals the
|
| 45 |
set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
|
| 46 |
(no missing, no extras).
|
| 47 |
-
5. STEP parseability:
|
| 48 |
-
geometry.
|
| 49 |
-
|
| 50 |
-
|
|
|
|
| 51 |
|
| 52 |
Hub-write ordering (after validation passes):
|
| 53 |
|
|
@@ -455,20 +456,20 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
|
|
| 455 |
# preserves the same `Fixture <name>` rejection text as the
|
| 456 |
# sequential loop did.
|
| 457 |
def _check_one_step(name: str) -> None:
|
| 458 |
-
step = unpacked / name
|
| 459 |
-
if
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
if step.stat().st_size == 0:
|
| 464 |
raise _ValidationError(
|
| 465 |
-
f"Fixture `{name}` has an empty `
|
| 466 |
)
|
| 467 |
try:
|
| 468 |
parse_step(step)
|
| 469 |
except RuntimeError as e:
|
| 470 |
raise _ValidationError(
|
| 471 |
-
f"Fixture `{name}` has an `
|
| 472 |
f"as STEP geometry: {e}"
|
| 473 |
) from e
|
| 474 |
|
|
@@ -478,6 +479,15 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
|
|
| 478 |
list(ex.map(_check_one_step, sorted(fixture_names)))
|
| 479 |
|
| 480 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
|
| 482 |
"""Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
|
| 483 |
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
|
|
|
| 44 |
4. Fixture-set match: the set of folders inside the zip equals the
|
| 45 |
set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
|
| 46 |
(no missing, no extras).
|
| 47 |
+
5. STEP parseability: any present ``<fixture>/output.step`` loads as STEP
|
| 48 |
+
geometry. A missing ``output.step`` is allowed and scores zero via the
|
| 49 |
+
evaluator's ``status="missing"`` path. Per-fixture validity (watertight,
|
| 50 |
+
manifold, etc) is *not* checked here; this gate only rejects files that are
|
| 51 |
+
present but not actually STEP.
|
| 52 |
|
| 53 |
Hub-write ordering (after validation passes):
|
| 54 |
|
|
|
|
| 456 |
# preserves the same `Fixture <name>` rejection text as the
|
| 457 |
# sequential loop did.
|
| 458 |
def _check_one_step(name: str) -> None:
|
| 459 |
+
step = _candidate_step_path(unpacked / name)
|
| 460 |
+
if step is None:
|
| 461 |
+
# Missing output is a valid benchmark outcome: the evaluator writes
|
| 462 |
+
# status="missing" and the fixture contributes cad_score=0.
|
| 463 |
+
return
|
| 464 |
if step.stat().st_size == 0:
|
| 465 |
raise _ValidationError(
|
| 466 |
+
f"Fixture `{name}` has an empty `{step.name}`."
|
| 467 |
)
|
| 468 |
try:
|
| 469 |
parse_step(step)
|
| 470 |
except RuntimeError as e:
|
| 471 |
raise _ValidationError(
|
| 472 |
+
f"Fixture `{name}` has an `{step.name}` that is not loadable "
|
| 473 |
f"as STEP geometry: {e}"
|
| 474 |
) from e
|
| 475 |
|
|
|
|
| 479 |
list(ex.map(_check_one_step, sorted(fixture_names)))
|
| 480 |
|
| 481 |
|
| 482 |
+
def _candidate_step_path(fixture_dir: Path) -> Path | None:
|
| 483 |
+
"""Return the submitted candidate STEP for *fixture_dir*, if present."""
|
| 484 |
+
for name in ("output.step", "output.stp"):
|
| 485 |
+
step = fixture_dir / name
|
| 486 |
+
if step.is_file():
|
| 487 |
+
return step
|
| 488 |
+
return None
|
| 489 |
+
|
| 490 |
+
|
| 491 |
def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
|
| 492 |
"""Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
|
| 493 |
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
|
tests/test_submit.py
CHANGED
|
@@ -11,6 +11,8 @@ network traffic.
|
|
| 11 |
"""
|
| 12 |
from __future__ import annotations
|
| 13 |
|
|
|
|
|
|
|
| 14 |
import submit
|
| 15 |
|
| 16 |
|
|
@@ -122,3 +124,29 @@ def test_pending_row_preserves_existing_metadata(monkeypatch):
|
|
| 122 |
"per_fixture_breakdown",
|
| 123 |
):
|
| 124 |
assert row[k] is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"""
|
| 12 |
from __future__ import annotations
|
| 13 |
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
import submit
|
| 17 |
|
| 18 |
|
|
|
|
| 124 |
"per_fixture_breakdown",
|
| 125 |
):
|
| 126 |
assert row[k] is None
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def test_validate_steps_allows_missing_output_step(tmp_path: Path, monkeypatch):
|
| 130 |
+
"""Missing fixture outputs are accepted; evaluator scores them as missing."""
|
| 131 |
+
(tmp_path / "101").mkdir()
|
| 132 |
+
(tmp_path / "102").mkdir()
|
| 133 |
+
calls: list[Path] = []
|
| 134 |
+
monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
|
| 135 |
+
|
| 136 |
+
submit._validate_steps_parseable(tmp_path, {"101", "102"})
|
| 137 |
+
|
| 138 |
+
assert calls == []
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
|
| 142 |
+
"""Present candidate files are still cheap-parse checked."""
|
| 143 |
+
fixture = tmp_path / "101"
|
| 144 |
+
fixture.mkdir()
|
| 145 |
+
candidate = fixture / "output.stp"
|
| 146 |
+
candidate.write_text("ISO-10303-21;\n")
|
| 147 |
+
calls: list[Path] = []
|
| 148 |
+
monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
|
| 149 |
+
|
| 150 |
+
submit._validate_steps_parseable(tmp_path, {"101"})
|
| 151 |
+
|
| 152 |
+
assert calls == [candidate]
|