Michael Rabinovich commited on
Commit
dc8ff2a
·
1 Parent(s): daae24c

submit: allow missing fixture outputs

Browse files
Files changed (4) hide show
  1. Dockerfile +1 -1
  2. app.py +3 -1
  3. submit.py +21 -11
  4. tests/test_submit.py +28 -0
Dockerfile CHANGED
@@ -41,7 +41,7 @@ RUN pip install --no-cache-dir -r /tmp/requirements.txt \
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
- ARG CADGENBENCH_SHA=e446cca
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
 
41
  # image rebuild picks up the latest code (pre-v1: always-updated). Lock
42
  # to a specific commit SHA at the v1 release so published scores are
43
  # reproducible (see space-setup/post-gt-swap.md Stage F).
44
+ ARG CADGENBENCH_SHA=b3ff8f0
45
  # Cache-bust the install below whenever the tracked ref moves: the
46
  # GitHub commits endpoint's response changes with each new commit on
47
  # `main`, so BuildKit re-fetches and invalidates the cached pip layer.
app.py CHANGED
@@ -745,7 +745,9 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
745
  f"""
746
  **Submission format.** A single zip with:
747
 
748
- - one folder per fixture in `{HF_DATA_REPO}`, each containing `output.step`;
 
 
749
  - a top-level `meta.json`:
750
 
751
  ```json
 
745
  f"""
746
  **Submission format.** A single zip with:
747
 
748
+ - one folder per fixture in `{HF_DATA_REPO}`; include `output.step` for
749
+ fixtures where your system produced a candidate. Missing `output.step`
750
+ scores zero for that fixture;
751
  - a top-level `meta.json`:
752
 
753
  ```json
submit.py CHANGED
@@ -44,10 +44,11 @@ Validation gates, in order:
44
  4. Fixture-set match: the set of folders inside the zip equals the
45
  set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
46
  (no missing, no extras).
47
- 5. STEP parseability: each ``<fixture>/output.step`` loads as STEP
48
- geometry. Per-fixture validity (watertight, manifold, etc) is
49
- *not* checked here, that's the evaluator's job and contributes to
50
- the per-fixture score; this gate only rejects "not actually STEP".
 
51
 
52
  Hub-write ordering (after validation passes):
53
 
@@ -455,20 +456,20 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
455
  # preserves the same `Fixture <name>` rejection text as the
456
  # sequential loop did.
457
  def _check_one_step(name: str) -> None:
458
- step = unpacked / name / "output.step"
459
- if not step.is_file():
460
- raise _ValidationError(
461
- f"Fixture `{name}` is missing its `output.step` file."
462
- )
463
  if step.stat().st_size == 0:
464
  raise _ValidationError(
465
- f"Fixture `{name}` has an empty `output.step`."
466
  )
467
  try:
468
  parse_step(step)
469
  except RuntimeError as e:
470
  raise _ValidationError(
471
- f"Fixture `{name}` has an `output.step` that is not loadable "
472
  f"as STEP geometry: {e}"
473
  ) from e
474
 
@@ -478,6 +479,15 @@ def _validate_steps_parseable(unpacked: Path, fixture_names: set[str]) -> None:
478
  list(ex.map(_check_one_step, sorted(fixture_names)))
479
 
480
 
 
 
 
 
 
 
 
 
 
481
  def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
482
  """Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
483
  ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
 
44
  4. Fixture-set match: the set of folders inside the zip equals the
45
  set of fixture directories in :func:`cadgenbench.common.paths.data_inputs_dir`
46
  (no missing, no extras).
47
+ 5. STEP parseability: any present ``<fixture>/output.step`` loads as STEP
48
+ geometry. A missing ``output.step`` is allowed and scores zero via the
49
+ evaluator's ``status="missing"`` path. Per-fixture validity (watertight,
50
+ manifold, etc) is *not* checked here; this gate only rejects files that are
51
+ present but not actually STEP.
52
 
53
  Hub-write ordering (after validation passes):
54
 
 
456
  # preserves the same `Fixture <name>` rejection text as the
457
  # sequential loop did.
458
  def _check_one_step(name: str) -> None:
459
+ step = _candidate_step_path(unpacked / name)
460
+ if step is None:
461
+ # Missing output is a valid benchmark outcome: the evaluator writes
462
+ # status="missing" and the fixture contributes cad_score=0.
463
+ return
464
  if step.stat().st_size == 0:
465
  raise _ValidationError(
466
+ f"Fixture `{name}` has an empty `{step.name}`."
467
  )
468
  try:
469
  parse_step(step)
470
  except RuntimeError as e:
471
  raise _ValidationError(
472
+ f"Fixture `{name}` has an `{step.name}` that is not loadable "
473
  f"as STEP geometry: {e}"
474
  ) from e
475
 
 
479
  list(ex.map(_check_one_step, sorted(fixture_names)))
480
 
481
 
482
+ def _candidate_step_path(fixture_dir: Path) -> Path | None:
483
+ """Return the submitted candidate STEP for *fixture_dir*, if present."""
484
+ for name in ("output.step", "output.stp"):
485
+ step = fixture_dir / name
486
+ if step.is_file():
487
+ return step
488
+ return None
489
+
490
+
491
  def _mint_submission_id(submitter_name: str, submission_name: str) -> str:
492
  """Build the basename used for ``submissions/<id>.zip`` and ``reports/<id>.*``."""
493
  ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
tests/test_submit.py CHANGED
@@ -11,6 +11,8 @@ network traffic.
11
  """
12
  from __future__ import annotations
13
 
 
 
14
  import submit
15
 
16
 
@@ -122,3 +124,29 @@ def test_pending_row_preserves_existing_metadata(monkeypatch):
122
  "per_fixture_breakdown",
123
  ):
124
  assert row[k] is None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  """
12
  from __future__ import annotations
13
 
14
+ from pathlib import Path
15
+
16
  import submit
17
 
18
 
 
124
  "per_fixture_breakdown",
125
  ):
126
  assert row[k] is None
127
+
128
+
129
+ def test_validate_steps_allows_missing_output_step(tmp_path: Path, monkeypatch):
130
+ """Missing fixture outputs are accepted; evaluator scores them as missing."""
131
+ (tmp_path / "101").mkdir()
132
+ (tmp_path / "102").mkdir()
133
+ calls: list[Path] = []
134
+ monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
135
+
136
+ submit._validate_steps_parseable(tmp_path, {"101", "102"})
137
+
138
+ assert calls == []
139
+
140
+
141
+ def test_validate_steps_checks_present_output_stp(tmp_path: Path, monkeypatch):
142
+ """Present candidate files are still cheap-parse checked."""
143
+ fixture = tmp_path / "101"
144
+ fixture.mkdir()
145
+ candidate = fixture / "output.stp"
146
+ candidate.write_text("ISO-10303-21;\n")
147
+ calls: list[Path] = []
148
+ monkeypatch.setattr(submit, "parse_step", lambda p: calls.append(p))
149
+
150
+ submit._validate_steps_parseable(tmp_path, {"101"})
151
+
152
+ assert calls == [candidate]