Michael Rabinovich Cursor commited on
Commit
28e0081
·
1 Parent(s): f17ac64

leaderboard: add Metrics tab/page + report deep-links

Browse files

New self-contained Metrics explainer (metrics_page.py) served at
/metrics and embedded in a Metrics tab: validity gate, the three axes,
editing renormalization, with formulas and the interface mating-group
illustration (vendored under assets/metrics/ via LFS, served by the
/metrics-assets route). Submit's merge path passes the submission name
and the /metrics base URL to the report generator so hosted reports
title themselves and their metric pills deep-link to the explainer.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (5) hide show
  1. .gitattributes +1 -0
  2. app.py +63 -0
  3. assets/metrics/mating_group.webp +3 -0
  4. metrics_page.py +366 -0
  5. submit.py +33 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.webp filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -59,6 +59,7 @@ from leaderboard import (
59
  render_public_url,
60
  )
61
  from gallery import render_gallery_page
 
62
  from tasks import load_tasks_from_dir, render_tasks_page
63
  from admin import (
64
  VALID_METHODS,
@@ -650,6 +651,43 @@ def serve_report(submission_id: str) -> Response:
650
  return Response(content=content, media_type="text/html; charset=utf-8")
651
 
652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  def _fetch_gt_render(fixture: str) -> bytes | None:
654
  """Pull a fixture's ground-truth GIF from the private GT dataset.
655
 
@@ -1002,6 +1040,17 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
1002
  tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
1003
  tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
1004
 
 
 
 
 
 
 
 
 
 
 
 
1005
  with gr.Tab("Submit"):
1006
  gr.Markdown(
1007
  f"""
@@ -1304,6 +1353,20 @@ app.add_api_route(
1304
  serve_report,
1305
  methods=["GET"],
1306
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1307
  # Cached render proxies the gallery's lazy-loaded turntables point at.
1308
  # Registered before the Gradio mount so they're not shadowed by the
1309
  # catch-all sub-app.
 
59
  render_public_url,
60
  )
61
  from gallery import render_gallery_page
62
+ from metrics_page import build_metrics_page
63
  from tasks import load_tasks_from_dir, render_tasks_page
64
  from admin import (
65
  VALID_METHODS,
 
651
  return Response(content=content, media_type="text/html; charset=utf-8")
652
 
653
 
654
+ def serve_metrics_page() -> Response:
655
+ """Serve the static metrics explainer at ``/metrics``.
656
+
657
+ Same-origin as the report proxy (``/reports/<id>.html``), so a
658
+ hosted report's headline pills can deep-link to ``/metrics#<anchor>``
659
+ and land on the matching section. The "Metrics" Gradio tab embeds
660
+ this same route in an iframe.
661
+ """
662
+ return HTMLResponse(content=build_metrics_page())
663
+
664
+
665
+ # Illustration assets the metrics page embeds (e.g. the interface-match
666
+ # mating-group WebP). Vendored into the Space repo under `assets/metrics/`
667
+ # and served here so the page renders self-contained, with no dependency
668
+ # on the code repo's raw GitHub URLs staying reachable.
669
+ METRICS_ASSETS_DIR = Path(__file__).parent / "assets" / "metrics"
670
+
671
+
672
+ def serve_metrics_asset(name: str) -> Response:
673
+ """Serve a bundled metrics illustration from ``assets/metrics/``.
674
+
675
+ Flat namespace (no nested paths), traversal-guarded. Cached hard:
676
+ these are static, versioned-with-the-repo assets.
677
+ """
678
+ if "/" in name or ".." in name:
679
+ return Response(status_code=404)
680
+ path = METRICS_ASSETS_DIR / name
681
+ if not path.is_file():
682
+ return Response(status_code=404)
683
+ media_type = mimetypes.guess_type(name)[0] or "application/octet-stream"
684
+ return Response(
685
+ content=path.read_bytes(),
686
+ media_type=media_type,
687
+ headers={"Cache-Control": RENDER_CACHE_CONTROL},
688
+ )
689
+
690
+
691
  def _fetch_gt_render(fixture: str) -> bytes | None:
692
  """Pull a fixture's ground-truth GIF from the private GT dataset.
693
 
 
1040
  tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
1041
  tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
1042
 
1043
+ with gr.Tab("Metrics"):
1044
+ # Static explainer for the (new) scoring metrics. Served as a
1045
+ # standalone `/metrics` route too, so the per-submission report's
1046
+ # headline pills can deep-link to `/metrics#<anchor>`; the tab just
1047
+ # embeds that same page in an iframe (single source of truth).
1048
+ gr.HTML(
1049
+ '<iframe src="/metrics" '
1050
+ 'style="width:100%; height:85vh; border:0; display:block;" '
1051
+ 'title="CADGenBench metrics"></iframe>'
1052
+ )
1053
+
1054
  with gr.Tab("Submit"):
1055
  gr.Markdown(
1056
  f"""
 
1353
  serve_report,
1354
  methods=["GET"],
1355
  )
1356
+ # Static metrics explainer. Same origin as the report proxy so report
1357
+ # pills can deep-link to `/metrics#<anchor>`; also embedded in the
1358
+ # Metrics tab. Registered before the Gradio mount so it isn't shadowed.
1359
+ app.add_api_route(
1360
+ "/metrics",
1361
+ serve_metrics_page,
1362
+ methods=["GET"],
1363
+ )
1364
+ # Illustration assets the metrics page embeds (vendored under assets/metrics/).
1365
+ app.add_api_route(
1366
+ "/metrics-assets/{name}",
1367
+ serve_metrics_asset,
1368
+ methods=["GET"],
1369
+ )
1370
  # Cached render proxies the gallery's lazy-loaded turntables point at.
1371
  # Registered before the Gradio mount so they're not shadowed by the
1372
  # catch-all sub-app.
assets/metrics/mating_group.webp ADDED

Git LFS Details

  • SHA256: 6b8e39727c55a6618eb2cdba4da3c505e583283ccaa91bd7a2685b084be0eb98
  • Pointer size: 131 Bytes
  • Size of remote file: 350 kB
metrics_page.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2026 Hugging Face
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Self-contained "Metrics" explainer page for the Space.
16
+
17
+ Builds one static, dependency-free HTML document explaining how a
18
+ candidate STEP is scored: the validity gate, the three orthogonal
19
+ axes (shape / topology / interface), and the editing renormalization.
20
+
21
+ It is curated (a Space-tailored summary, deliberately a little
22
+ duplicated from the canonical ``docs/metrics*`` in the code repo)
23
+ rather than rendered from those markdown files, because the docs use
24
+ repo-relative links + local illustration images that don't resolve
25
+ when hosted. The page links out to the GitHub deep-dives for the full
26
+ derivations, so the canonical source of truth stays there.
27
+
28
+ The page is served two ways from the same builder
29
+ (:func:`build_metrics_page`):
30
+
31
+ - as a standalone route ``/metrics`` (so the per-submission report's
32
+ headline metric pills can deep-link to ``/metrics#<anchor>``), and
33
+ - embedded in the "Metrics" Gradio tab via an iframe.
34
+
35
+ Formulas are plain monospace blocks (no MathJax / KaTeX), so the page
36
+ renders identically online and offline with no network dependency. The
37
+ anchor ids are a published contract the report links against; see
38
+ :data:`METRIC_ANCHORS`.
39
+ """
40
+ from __future__ import annotations
41
+
42
+ # Section anchor ids. The per-submission report's headline pills link to
43
+ # ``/metrics#<anchor>``; keep these stable (and in sync with the
44
+ # report's pill links in cadgenbench's single_run.py).
45
+ METRIC_ANCHORS = {
46
+ "cad_score": "cad-score",
47
+ "shape": "shape-similarity",
48
+ "interface": "interface-match",
49
+ "topology": "topology-match",
50
+ "validity": "validity",
51
+ "editing": "editing",
52
+ }
53
+
54
+ # Canonical deep-dive docs live in the code repo; linked from each
55
+ # section so the Space page stays a summary and the full derivations
56
+ # have one source of truth.
57
+ _DOCS_BASE = "https://github.com/huggingface/cadgenbench/blob/main/docs"
58
+
59
+ # Bundled illustration served by the Space (see app.py's /metrics-assets
60
+ # route). Relative so it resolves same-origin whether the page is the
61
+ # standalone /metrics route or the iframe in the Metrics tab.
62
+ _MATING_GROUP_IMG = "/metrics-assets/mating_group.webp"
63
+
64
+ _CSS = """\
65
+ * { box-sizing: border-box; }
66
+ body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
67
+ max-width: 960px; margin: 0 auto; padding: 24px 20px 80px;
68
+ background: #f8f9fa; color: #1f2430; line-height: 1.55; }
69
+ a { color: #1565c0; }
70
+ h1 { font-size: 1.7em; margin: 0 0 4px; }
71
+ .lede { color: #5b6170; margin: 0 0 20px; }
72
+ .card { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
73
+ padding: 20px 24px; margin: 16px 0; box-shadow: 0 1px 3px rgba(0,0,0,0.05);
74
+ scroll-margin-top: 16px; }
75
+ .card h2 { margin: 0 0 10px; font-size: 1.2em; display: flex; align-items: baseline;
76
+ gap: 10px; }
77
+ .card h3 { font-size: 0.98em; margin: 16px 0 4px; color: #37474f; }
78
+ .axis-tag { font-family: monospace; font-size: 0.62em; font-weight: 700;
79
+ text-transform: uppercase; letter-spacing: 0.04em; padding: 3px 8px;
80
+ border-radius: 6px; }
81
+ .t-cad { border-left: 5px solid #37474f; }
82
+ .t-cad .axis-tag { background: #eceff1; color: #37474f; }
83
+ .t-shape { border-left: 5px solid #1565c0; }
84
+ .t-shape .axis-tag { background: #e3f2fd; color: #1565c0; }
85
+ .t-iface { border-left: 5px solid #4527a0; }
86
+ .t-iface .axis-tag { background: #ede7f6; color: #4527a0; }
87
+ .t-topo { border-left: 5px solid #006d77; }
88
+ .t-topo .axis-tag { background: #d8f3f4; color: #006d77; }
89
+ .t-gate { border-left: 5px solid #c62828; }
90
+ .t-gate .axis-tag { background: #ffebee; color: #c62828; }
91
+ .t-edit { border-left: 5px solid #9e7700; }
92
+ .t-edit .axis-tag { background: #fff9c4; color: #9e7700; }
93
+ pre.formula { background: #0f1525; color: #e7ecf5; border-radius: 8px;
94
+ padding: 14px 16px; overflow-x: auto; font-size: 0.86em;
95
+ line-height: 1.5; margin: 10px 0; }
96
+ code { background: #eef0f4; padding: 1px 5px; border-radius: 4px;
97
+ font-size: 0.88em; }
98
+ table { border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 0.92em; }
99
+ th, td { border: 1px solid #e3e5ea; padding: 7px 10px; text-align: left; }
100
+ th { background: #f5f7fa; }
101
+ .deep { font-size: 0.86em; color: #5b6170; margin-top: 12px; }
102
+ .toc { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
103
+ padding: 14px 20px; margin: 16px 0; }
104
+ .toc ul { margin: 6px 0 0; padding-left: 18px; }
105
+ .note { color: #5b6170; font-size: 0.92em; }
106
+ figure.fig { margin: 14px 0; }
107
+ figure.fig img { display: block; width: 100%; max-width: 520px; height: auto;
108
+ border: 1px solid #e3e5ea; border-radius: 10px; background: #fff; }
109
+ figure.fig figcaption { font-size: 0.84em; color: #5b6170; margin-top: 6px;
110
+ max-width: 560px; }
111
+ .weight-pill { font-family: monospace; font-size: 0.8em; padding: 1px 7px;
112
+ border-radius: 6px; background: #eceff1; color: #37474f; }
113
+ """
114
+
115
+
116
+ def _section(
117
+ *, anchor: str, css_class: str, tag: str, title: str, body: str,
118
+ deep_dive: str | None = None,
119
+ ) -> str:
120
+ deep = (
121
+ f'<p class="deep">Full derivation: '
122
+ f'<a href="{deep_dive}" target="_blank" rel="noopener">{deep_dive}</a></p>'
123
+ if deep_dive
124
+ else ""
125
+ )
126
+ return (
127
+ f'<section class="card {css_class}" id="{anchor}">'
128
+ f'<h2><span class="axis-tag">{tag}</span>{title}</h2>'
129
+ f"{body}{deep}"
130
+ "</section>"
131
+ )
132
+
133
+
134
+ def build_metrics_page() -> str:
135
+ """Return the full self-contained Metrics explainer HTML document."""
136
+ a = METRIC_ANCHORS
137
+
138
+ overview = _section(
139
+ anchor=a["cad_score"],
140
+ css_class="t-cad",
141
+ tag="CAD Score",
142
+ title="How one part is scored",
143
+ body=(
144
+ "<p>CADGenBench scores a generated part (a STEP file) against one "
145
+ "ground-truth STEP. First a hard <b>validity gate</b>; if it "
146
+ "passes, the <b>CAD Score</b> is a weighted mean of three "
147
+ "independent metrics, each in [0, 1].</p>"
148
+ '<pre class="formula">'
149
+ "cad_score = 0 if not valid\n"
150
+ " = 0.4*shape + 0.4*interface + 0.2*topology otherwise"
151
+ "</pre>"
152
+ "<p class='note'>(This is the <b>generation</b> composition. "
153
+ "<b>Editing</b> tasks renormalize the shape axis and reweight — "
154
+ f'see <a href="#{a["editing"]}">Editing tasks</a>.)</p>'
155
+ "<table><thead><tr><th>Component</th><th>Range</th>"
156
+ "<th>What it asks</th></tr></thead><tbody>"
157
+ f'<tr><td><a href="#{a["validity"]}">CAD Validity</a> (gate)</td>'
158
+ "<td>{0, 1}</td><td>Is the geometry valid?</td></tr>"
159
+ f'<tr><td><a href="#{a["shape"]}">Shape Similarity</a></td>'
160
+ "<td>[0, 1]</td><td>Does the bulk geometry match?</td></tr>"
161
+ f'<tr><td><a href="#{a["topology"]}">Topology Match</a></td>'
162
+ "<td>[0, 1]</td><td>Same pieces / holes / voids?</td></tr>"
163
+ f'<tr><td><a href="#{a["interface"]}">Interface Match</a></td>'
164
+ "<td>[0, 1]</td><td>Does it bolt up to the same fixture?</td></tr>"
165
+ "</tbody></table>"
166
+ "<h3>Why three axes</h3>"
167
+ "<p>They are orthogonal by construction — each catches errors the "
168
+ "others are blind to:</p>"
169
+ "<ul>"
170
+ "<li><b>Shape</b> catches wrong bulk geometry; blind to topology.</li>"
171
+ "<li><b>Topology</b> catches wrong hole / piece / void counts; blind "
172
+ "to feature position.</li>"
173
+ "<li><b>Interface</b> catches a misplaced / mis-sized mating feature; "
174
+ "blind to overall shape.</li>"
175
+ "</ul>"
176
+ "<p class='note'>Outputs are rigidly aligned to the ground truth "
177
+ "(rotation + translation only, never scale) before scoring.</p>"
178
+ ),
179
+ deep_dive=f"{_DOCS_BASE}/metrics.md",
180
+ )
181
+
182
+ validity = _section(
183
+ anchor=a["validity"],
184
+ css_class="t-gate",
185
+ tag="Gate",
186
+ title="CAD Validity",
187
+ body=(
188
+ "<p>Runs before every other metric on the raw candidate STEP. Any "
189
+ "failure sets <code>is_valid = False</code> and forces "
190
+ "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
191
+ "but valid one. Passing requires all of:</p>"
192
+ "<ol>"
193
+ "<li><b>Well-formed BREP</b> — no per-face / edge / vertex errors "
194
+ "(self-intersecting wires, edges off their surface, etc.).</li>"
195
+ "<li><b>Watertight</b> — every shell is closed; no naked or free "
196
+ "edges.</li>"
197
+ "<li><b>Meshable as a closed orientable manifold</b> — tessellates "
198
+ "to a manifold, closed (3F = 2E), orientation-consistent triangle "
199
+ "mesh.</li>"
200
+ "</ol>"
201
+ ),
202
+ deep_dive=f"{_DOCS_BASE}/metrics/cad_validity.md",
203
+ )
204
+
205
+ shape = _section(
206
+ anchor=a["shape"],
207
+ css_class="t-shape",
208
+ tag="Shape",
209
+ title="Shape Similarity",
210
+ body=(
211
+ "<p>Does the bulk geometry match? The mean of two complementary "
212
+ "sub-metrics, each in [0, 1]:</p>"
213
+ '<pre class="formula">'
214
+ "shape_similarity = 0.5 * (point_cloud_F1 + volume_IoU)"
215
+ "</pre>"
216
+ "<h3>Point-cloud F1</h3>"
217
+ "<p>Checks the candidate's surface sits where the GT's does and "
218
+ "faces the same way. Points are sampled across both surfaces with "
219
+ "their outward normals; a point matches when the nearest point on "
220
+ "the other surface is within 0.5% of the GT bounding-box diagonal "
221
+ "<b>and</b> the normals agree to within 20°. Precision and recall "
222
+ "combine into F1.</p>"
223
+ "<h3>Volume IoU</h3>"
224
+ "<p>Shared volume of the two solids over their combined volume "
225
+ "(intersection over union), via a Boolean kernel.</p>"
226
+ "<p class='note'>Both use a tolerance proportional to part size, so "
227
+ "small features can move without shifting the score — those are "
228
+ f'covered by <a href="#{a["interface"]}">interface match</a>.</p>'
229
+ ),
230
+ deep_dive=f"{_DOCS_BASE}/metrics/shape_similarity.md",
231
+ )
232
+
233
+ topology = _section(
234
+ anchor=a["topology"],
235
+ css_class="t-topo",
236
+ tag="Topo",
237
+ title="Topology Match",
238
+ body=(
239
+ "<p>Does the candidate have the same number of pieces, "
240
+ "through-holes, and internal voids? It compares the three "
241
+ "<b>Betti numbers</b> of the solid:</p>"
242
+ "<ul>"
243
+ "<li><b>b&#8320;</b>: connected solid components (pieces).</li>"
244
+ "<li><b>b&#8321;</b>: independent through-handles (e.g. "
245
+ "through-holes).</li>"
246
+ "<li><b>b&#8322;</b>: enclosed internal voids (cavities).</li>"
247
+ "</ul>"
248
+ "<p>Each axis gets a fuzzy log-ratio against GT, sharpened by "
249
+ "&#945; = 2, and the three are <b>multiplied</b>:</p>"
250
+ '<pre class="formula">'
251
+ "s_i = ((min(cand,gt) + 1) / (max(cand,gt) + 1)) ^ 2\n"
252
+ "topology_match = s_0 * s_1 * s_2"
253
+ "</pre>"
254
+ "<p>The product (not the mean) means one wrong count collapses the "
255
+ "score: topology is discrete, so two of three right is not a partial "
256
+ "match. Example: GT (1,2,0) vs candidate (1,4,0) scores "
257
+ "(3/5)&#178; = 0.36. Blind features (blind pockets, fillets, "
258
+ "chamfers) are topologically trivial and covered by the other "
259
+ "axes.</p>"
260
+ ),
261
+ deep_dive=f"{_DOCS_BASE}/metrics/topo_match.md",
262
+ )
263
+
264
+ interface = _section(
265
+ anchor=a["interface"],
266
+ css_class="t-iface",
267
+ tag="Interface",
268
+ title="Interface Match",
269
+ body=(
270
+ "<p>Would it bolt up to the same fixture? Each mating feature is a "
271
+ "region of space the candidate must match in shape, size, and "
272
+ "position:</p>"
273
+ "<ul>"
274
+ "<li><b>Keep-out (KOR)</b> — must be empty (a bolt hole, a slot).</li>"
275
+ "<li><b>Keep-in (KIR)</b> — must be solid (a locating boss, a "
276
+ "pin).</li>"
277
+ "</ul>"
278
+ "<h3>Mating groups</h3>"
279
+ "<p>The features that must seat together against a single fixture "
280
+ "form one <b>mating group</b> — here, two bolt holes and a slot that "
281
+ "one jig drops into. A part can have several independent groups (say "
282
+ "a bolt pattern on one face and a boss on another), and each group "
283
+ "is scored on its own.</p>"
284
+ '<figure class="fig">'
285
+ f'<img src="{_MATING_GROUP_IMG}" loading="lazy" '
286
+ 'alt="A jig with two pins and a slot key seating into a part\'s two '
287
+ 'bolt holes and slot">'
288
+ "<figcaption>A mating group: a jig with two pins and a slot key "
289
+ "seats into the part's two bolt holes and slot. The candidate has "
290
+ "to fit the same fixture.</figcaption>"
291
+ "</figure>"
292
+ "<h3>Scoring</h3>"
293
+ "<p>Per group:</p>"
294
+ "<ol>"
295
+ "<li><b>Per-feature fit</b> — volumetric IoU against the region "
296
+ "(with a thin shell of opposite material, so both oversize and "
297
+ "undersize lose points).</li>"
298
+ "<li><b>Bounded pose search</b> — &#177;1&#176; and &#177;1% of part "
299
+ "size per axis, so a feature isn't penalized for the residual of "
300
+ "whole-part alignment.</li>"
301
+ "<li><b>Pass/fail ramp</b> — IoU &#8805; 0.95 &#8594; 1, &#8804; 0.80 "
302
+ "&#8594; 0, linear between; a sloppy fit scores 0.</li>"
303
+ "</ol>"
304
+ "<p>A group scores as its <b>worst</b> feature (the minimum); the "
305
+ "fixture scores as the <b>mean</b> over its groups, so nailing one "
306
+ "interface and missing another still earns partial credit.</p>"
307
+ "<p class='note'>In the report's overlay: <b>blue</b> where it fits, "
308
+ "<b>red</b> where the candidate has material it shouldn't (too much), "
309
+ "<b>amber</b> where it's missing material it should have (too "
310
+ "little).</p>"
311
+ ),
312
+ deep_dive=f"{_DOCS_BASE}/metrics/interface_match.md",
313
+ )
314
+
315
+ editing = _section(
316
+ anchor=a["editing"],
317
+ css_class="t-edit",
318
+ tag="Editing",
319
+ title="Editing tasks: no-op renormalization",
320
+ body=(
321
+ "<p>Editing fixtures ship an <code>input.step</code> plus an edit "
322
+ "request; the GT is a small change to that input. Since all three "
323
+ "axes measure global similarity, submitting the input unchanged "
324
+ "(the <b>no-op</b>) already scores high, so the raw composition "
325
+ "would reward doing nothing.</p>"
326
+ "<p>The fix renormalizes the <b>shape</b> axis against the no-op "
327
+ "baseline <code>b = shape_similarity(input, GT)</code>:</p>"
328
+ '<pre class="formula">'
329
+ "s_renorm = max(0, (shape_similarity - b) / (1 - b))\n"
330
+ "cad_score = 0.6*s_renorm + 0.3*interface + 0.1*topology (0 if not valid)"
331
+ "</pre>"
332
+ "<p>This maps the no-op to 0 and a perfect candidate to 1. Topology "
333
+ "and interface stay raw (most edits leave them unchanged). A no-op "
334
+ "therefore caps at 0.3 + 0.1 = 0.4, and any real shape improvement "
335
+ "clears it.</p>"
336
+ ),
337
+ deep_dive=f"{_DOCS_BASE}/metrics.md#editing-tasks-no-op-renormalization",
338
+ )
339
+
340
+ toc = (
341
+ '<nav class="toc"><b>On this page</b><ul>'
342
+ f'<li><a href="#{a["cad_score"]}">CAD Score &mdash; how one part is scored</a></li>'
343
+ f'<li><a href="#{a["validity"]}">CAD Validity (gate)</a></li>'
344
+ f'<li><a href="#{a["shape"]}">Shape Similarity</a></li>'
345
+ f'<li><a href="#{a["topology"]}">Topology Match</a></li>'
346
+ f'<li><a href="#{a["interface"]}">Interface Match</a></li>'
347
+ f'<li><a href="#{a["editing"]}">Editing tasks</a></li>'
348
+ "</ul></nav>"
349
+ )
350
+
351
+ return (
352
+ "<!DOCTYPE html><html lang='en'><head>"
353
+ "<meta charset='utf-8'>"
354
+ "<meta name='viewport' content='width=device-width, initial-scale=1'>"
355
+ "<title>CADGenBench &mdash; Metrics</title>"
356
+ f"<style>{_CSS}</style>"
357
+ "</head><body>"
358
+ "<h1>Metrics</h1>"
359
+ "<p class='lede'>How CADGenBench scores one generated CAD part against "
360
+ "the ground truth. These metrics are new, so this page explains each "
361
+ "one; the canonical reference lives in the "
362
+ f'<a href="{_DOCS_BASE}/metrics.md" target="_blank" rel="noopener">'
363
+ "code repo</a>.</p>"
364
+ f"{toc}{overview}{validity}{shape}{topology}{interface}{editing}"
365
+ "</body></html>"
366
+ )
submit.py CHANGED
@@ -144,6 +144,11 @@ REPORTS_DIR = "reports"
144
  # registered in app.py and the constants in the eval job's eval_job.py.
145
  GT_PROXY_BASE_URL = "/gt"
146
  INPUT_PROXY_BASE_URL = "/task-input"
 
 
 
 
 
147
  DATA_REV_SHORT_LEN = 12
148
  FAILURE_REASON_MAX_CHARS = 200
149
  SHA256_BLOCK_SIZE = 64 * 1024
@@ -970,6 +975,32 @@ def _download_results_jsonl() -> str:
970
  return Path(path).read_text(encoding="utf-8")
971
 
972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
973
  def _resolve_data_revision() -> str:
974
  """Return a short sha for the cadgenbench-data dataset, cached per process.
975
 
@@ -1561,9 +1592,11 @@ def _merge_shards_and_publish(
1561
  # the Space's token-holding proxy routes.
1562
  html = generate_html(
1563
  run_data,
 
1564
  render_base_url=render_submission_base_url(submission_id),
1565
  gt_base_url=GT_PROXY_BASE_URL,
1566
  input_base_url=INPUT_PROXY_BASE_URL,
 
1567
  download_url=_submission_zip_url(submission_id),
1568
  )
1569
  html_path = tmp / f"{submission_id}.html"
 
144
  # registered in app.py and the constants in the eval job's eval_job.py.
145
  GT_PROXY_BASE_URL = "/gt"
146
  INPUT_PROXY_BASE_URL = "/task-input"
147
+ # Same-origin route the Space serves the metrics explainer at (see
148
+ # app.py). Passed to the report generator so its headline metric pills
149
+ # deep-link to `/metrics#<anchor>`; relative so it resolves against the
150
+ # Space origin whether the report is opened locally or on huggingface.co.
151
+ METRICS_PAGE_URL = "/metrics"
152
  DATA_REV_SHORT_LEN = 12
153
  FAILURE_REASON_MAX_CHARS = 200
154
  SHA256_BLOCK_SIZE = 64 * 1024
 
975
  return Path(path).read_text(encoding="utf-8")
976
 
977
 
978
+ def _submission_name_for(submission_id: str) -> str | None:
979
+ """Human-readable submission name from the row, for the report heading.
980
+
981
+ Read off ``results.jsonl`` (the pending row written at submit time
982
+ already carries ``submission_name``) so the merged report can title
983
+ itself with the submission name rather than the opaque id. Best
984
+ effort: any read miss / Hub blip returns ``None`` and the report
985
+ falls back to its ``CADGenBench / <timestamp>`` heading.
986
+ """
987
+ try:
988
+ body = _download_results_jsonl()
989
+ for line in body.splitlines():
990
+ if not line.strip():
991
+ continue
992
+ row = json.loads(line)
993
+ if row.get("submission_id") == submission_id:
994
+ name = row.get("submission_name")
995
+ return str(name) if name else None
996
+ except Exception as e: # noqa: BLE001 - heading is cosmetic, never fail merge
997
+ logger.warning(
998
+ "Could not resolve submission_name for %s (%s: %s)",
999
+ submission_id, type(e).__name__, e,
1000
+ )
1001
+ return None
1002
+
1003
+
1004
  def _resolve_data_revision() -> str:
1005
  """Return a short sha for the cadgenbench-data dataset, cached per process.
1006
 
 
1592
  # the Space's token-holding proxy routes.
1593
  html = generate_html(
1594
  run_data,
1595
+ submission_name=_submission_name_for(submission_id),
1596
  render_base_url=render_submission_base_url(submission_id),
1597
  gt_base_url=GT_PROXY_BASE_URL,
1598
  input_base_url=INPUT_PROXY_BASE_URL,
1599
+ metrics_base_url=METRICS_PAGE_URL,
1600
  download_url=_submission_zip_url(submission_id),
1601
  )
1602
  html_path = tmp / f"{submission_id}.html"