Spaces:

HuggingAI4Engineering
/

cadgenbench-leaderboard

Running

App Files Files Community

Michael Rabinovich commited on 2 days ago

Commit

dc8ff2a

1 Parent(s): daae24c

submit: allow missing fixture outputs

Browse files

Files changed (4) hide show

Dockerfile +1 -1
app.py +3 -1
submit.py +21 -11
tests/test_submit.py +28 -0

Dockerfile CHANGED Viewed

@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
 # image rebuild picks up the latest code (pre-v1: always-updated). Lock
 # to a specific commit SHA at the v1 release so published scores are
 # reproducible (see space-setup/post-gt-swap.md Stage F).
-ARG CADGENBENCH_SHA=e446cca
 # Cache-bust the install below whenever the tracked ref moves: the
 # GitHub commits endpoint's response changes with each new commit on
 # `main`, so BuildKit re-fetches and invalidates the cached pip layer.

 # image rebuild picks up the latest code (pre-v1: always-updated). Lock
 # to a specific commit SHA at the v1 release so published scores are
 # reproducible (see space-setup/post-gt-swap.md Stage F).
+ARG CADGENBENCH_SHA=b3ff8f0
 # Cache-bust the install below whenever the tracked ref moves: the
 # GitHub commits endpoint's response changes with each new commit on
 # `main`, so BuildKit re-fetches and invalidates the cached pip layer.

app.py CHANGED Viewed

@@ -745,7 +745,9 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
             f"""
 **Submission format.** A single zip with:
-- one folder per fixture in `{HF_DATA_REPO}`, each containing `output.step`;
 - a top-level `meta.json`:
 ```json

             f"""
 **Submission format.** A single zip with:
+- one folder per fixture in `{HF_DATA_REPO}`; include `output.step` for
+  fixtures where your system produced a candidate. Missing `output.step`
+  scores zero for that fixture;
 - a top-level `meta.json`:
 ```json

submit.py CHANGED Viewed

@@ -44,10 +44,11 @@ Validation gates, in order:
 4. Fixture-set match: the set of folders inside the zip equals the
    set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
    (no missing, no extras).
-5. STEP parseability: each ``<fixture>/output.step`` loads as STEP
-   geometry. Per-fixture validity (watertight, manifold, etc) is
-   *not* checked here, that's the evaluator's job and contributes to
-   the per-fixture score; this gate only rejects "not actually STEP".
 Hub-write ordering (after validation passes):
@@ -455,20 +456,20 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
     # preserves the same `Fixture <name>` rejection text as the
     # sequential loop did.
     def _check_one_step(name: str) -> None:
-        step = unpacked / name / "output.step"
-        if not step.is_file():
-            raise _ValidationError(
-                f"Fixture `{name}` is missing its `output.step` file."
-            )
         if step.stat().st_size == 0:
             raise _ValidationError(
-                f"Fixture `{name}` has an empty `output.step`."
             )
         try:
             parse_step(step)
         except RuntimeError as e:
             raise _ValidationError(
-                f"Fixture `{name}` has an `output.step` that is not loadable "
                 f"as STEP geometry: {e}"
             ) from e
@@ -478,6 +479,15 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
         list(ex.map(_check_one_step, sorted(fixture_names)))
 def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
     """Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
     ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")

 4. Fixture-set match: the set of folders inside the zip equals the
    set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
    (no missing, no extras).
+5. STEP parseability: any present ``<fixture>/output.step`` loads as STEP
+   geometry. A missing ``output.step`` is allowed and scores zero via the
+   evaluator's ``status="missing"`` path. Per-fixture validity (watertight,
+   manifold, etc) is *not* checked here; this gate only rejects files that are
+   present but not actually STEP.
 Hub-write ordering (after validation passes):
     # preserves the same `Fixture <name>` rejection text as the
     # sequential loop did.
     def _check_one_step(name: str) -> None:
+        step = _candidate_step_path(unpacked / name)
+        if step is None:
+            # Missing output is a valid benchmark outcome: the evaluator writes
+            # status="missing" and the fixture contributes cad_score=0.
+            return
         if step.stat().st_size == 0:
             raise _ValidationError(
+                f"Fixture `{name}` has an empty `{step.name}`."
             )
         try:
             parse_step(step)
         except RuntimeError as e:
             raise _ValidationError(
+                f"Fixture `{name}` has an `{step.name}` that is not loadable "
                 f"as STEP geometry: {e}"
             ) from e
         list(ex.map(_check_one_step, sorted(fixture_names)))
+def _candidate_step_path(fixture_dir: Path) -> Path | None:
+    """Return the submitted candidate STEP for *fixture_dir*, if present."""
+    for name in ("output.step", "output.stp"):
+        step = fixture_dir / name
+        if step.is_file():
+            return step
+    return None
 def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
     """Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
     ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")

tests/test_submit.py CHANGED Viewed

@@ -11,6 +11,8 @@ network traffic.
 """
 from __future__ import annotations
 import submit
@@ -122,3 +124,29 @@ def test_pending_row_preserves_existing_metadata(monkeypatch):
         "per_fixture_breakdown",
     ):
         assert row[k] is None

 """
 from __future__ import annotations
+from pathlib import Path
 import submit
         "per_fixture_breakdown",
     ):
         assert row[k] is None
+def test_validate_steps_allows_missing_output_step(tmp_path: Path, monkeypatch):
+    """Missing fixture outputs are accepted; evaluator scores them as missing."""
+    (tmp_path / "101").mkdir()
+    (tmp_path / "102").mkdir()
+    calls: list[Path] = []
+    monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
+    submit._validate_steps_parseable(tmp_path, {"101", "102"})
+    assert calls == []
+def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
+    """Present candidate files are still cheap-parse checked."""
+    fixture = tmp_path / "101"
+    fixture.mkdir()
+    candidate = fixture / "output.stp"
+    candidate.write_text("ISO-10303-21;\n")
+    calls: list[Path] = []
+    monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
+    submit._validate_steps_parseable(tmp_path, {"101"})
+    assert calls == [candidate]