Michael Rabinovich Cursor commited on
Commit ·
28e0081
1
Parent(s): f17ac64
leaderboard: add Metrics tab/page + report deep-links
Browse filesNew self-contained Metrics explainer (metrics_page.py) served at
/metrics and embedded in a Metrics tab: validity gate, the three axes,
editing renormalization, with formulas and the interface mating-group
illustration (vendored under assets/metrics/ via LFS, served by the
/metrics-assets route). Submit's merge path passes the submission name
and the /metrics base URL to the report generator so hosted reports
title themselves and their metric pills deep-link to the explainer.
Co-authored-by: Cursor <cursoragent@cursor.com>
- .gitattributes +1 -0
- app.py +63 -0
- assets/metrics/mating_group.webp +3 -0
- metrics_page.py +366 -0
- submit.py +33 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.webp filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -59,6 +59,7 @@ from leaderboard import (
|
|
| 59 |
render_public_url,
|
| 60 |
)
|
| 61 |
from gallery import render_gallery_page
|
|
|
|
| 62 |
from tasks import load_tasks_from_dir, render_tasks_page
|
| 63 |
from admin import (
|
| 64 |
VALID_METHODS,
|
|
@@ -650,6 +651,43 @@ def serve_report(submission_id: str) -> Response:
|
|
| 650 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 651 |
|
| 652 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 654 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 655 |
|
|
@@ -1002,6 +1040,17 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
|
|
| 1002 |
tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
|
| 1003 |
tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
|
| 1004 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1005 |
with gr.Tab("Submit"):
|
| 1006 |
gr.Markdown(
|
| 1007 |
f"""
|
|
@@ -1304,6 +1353,20 @@ app.add_api_route(
|
|
| 1304 |
serve_report,
|
| 1305 |
methods=["GET"],
|
| 1306 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1307 |
# Cached render proxies the gallery's lazy-loaded turntables point at.
|
| 1308 |
# Registered before the Gradio mount so they're not shadowed by the
|
| 1309 |
# catch-all sub-app.
|
|
|
|
| 59 |
render_public_url,
|
| 60 |
)
|
| 61 |
from gallery import render_gallery_page
|
| 62 |
+
from metrics_page import build_metrics_page
|
| 63 |
from tasks import load_tasks_from_dir, render_tasks_page
|
| 64 |
from admin import (
|
| 65 |
VALID_METHODS,
|
|
|
|
| 651 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 652 |
|
| 653 |
|
| 654 |
+
def serve_metrics_page() -> Response:
|
| 655 |
+
"""Serve the static metrics explainer at ``/metrics``.
|
| 656 |
+
|
| 657 |
+
Same-origin as the report proxy (``/reports/<id>.html``), so a
|
| 658 |
+
hosted report's headline pills can deep-link to ``/metrics#<anchor>``
|
| 659 |
+
and land on the matching section. The "Metrics" Gradio tab embeds
|
| 660 |
+
this same route in an iframe.
|
| 661 |
+
"""
|
| 662 |
+
return HTMLResponse(content=build_metrics_page())
|
| 663 |
+
|
| 664 |
+
|
| 665 |
+
# Illustration assets the metrics page embeds (e.g. the interface-match
|
| 666 |
+
# mating-group WebP). Vendored into the Space repo under `assets/metrics/`
|
| 667 |
+
# and served here so the page renders self-contained, with no dependency
|
| 668 |
+
# on the code repo's raw GitHub URLs staying reachable.
|
| 669 |
+
METRICS_ASSETS_DIR = Path(__file__).parent / "assets" / "metrics"
|
| 670 |
+
|
| 671 |
+
|
| 672 |
+
def serve_metrics_asset(name: str) -> Response:
|
| 673 |
+
"""Serve a bundled metrics illustration from ``assets/metrics/``.
|
| 674 |
+
|
| 675 |
+
Flat namespace (no nested paths), traversal-guarded. Cached hard:
|
| 676 |
+
these are static, versioned-with-the-repo assets.
|
| 677 |
+
"""
|
| 678 |
+
if "/" in name or ".." in name:
|
| 679 |
+
return Response(status_code=404)
|
| 680 |
+
path = METRICS_ASSETS_DIR / name
|
| 681 |
+
if not path.is_file():
|
| 682 |
+
return Response(status_code=404)
|
| 683 |
+
media_type = mimetypes.guess_type(name)[0] or "application/octet-stream"
|
| 684 |
+
return Response(
|
| 685 |
+
content=path.read_bytes(),
|
| 686 |
+
media_type=media_type,
|
| 687 |
+
headers={"Cache-Control": RENDER_CACHE_CONTROL},
|
| 688 |
+
)
|
| 689 |
+
|
| 690 |
+
|
| 691 |
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 692 |
"""Pull a fixture's ground-truth GIF from the private GT dataset.
|
| 693 |
|
|
|
|
| 1040 |
tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
|
| 1041 |
tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
|
| 1042 |
|
| 1043 |
+
with gr.Tab("Metrics"):
|
| 1044 |
+
# Static explainer for the (new) scoring metrics. Served as a
|
| 1045 |
+
# standalone `/metrics` route too, so the per-submission report's
|
| 1046 |
+
# headline pills can deep-link to `/metrics#<anchor>`; the tab just
|
| 1047 |
+
# embeds that same page in an iframe (single source of truth).
|
| 1048 |
+
gr.HTML(
|
| 1049 |
+
'<iframe src="/metrics" '
|
| 1050 |
+
'style="width:100%; height:85vh; border:0; display:block;" '
|
| 1051 |
+
'title="CADGenBench metrics"></iframe>'
|
| 1052 |
+
)
|
| 1053 |
+
|
| 1054 |
with gr.Tab("Submit"):
|
| 1055 |
gr.Markdown(
|
| 1056 |
f"""
|
|
|
|
| 1353 |
serve_report,
|
| 1354 |
methods=["GET"],
|
| 1355 |
)
|
| 1356 |
+
# Static metrics explainer. Same origin as the report proxy so report
|
| 1357 |
+
# pills can deep-link to `/metrics#<anchor>`; also embedded in the
|
| 1358 |
+
# Metrics tab. Registered before the Gradio mount so it isn't shadowed.
|
| 1359 |
+
app.add_api_route(
|
| 1360 |
+
"/metrics",
|
| 1361 |
+
serve_metrics_page,
|
| 1362 |
+
methods=["GET"],
|
| 1363 |
+
)
|
| 1364 |
+
# Illustration assets the metrics page embeds (vendored under assets/metrics/).
|
| 1365 |
+
app.add_api_route(
|
| 1366 |
+
"/metrics-assets/{name}",
|
| 1367 |
+
serve_metrics_asset,
|
| 1368 |
+
methods=["GET"],
|
| 1369 |
+
)
|
| 1370 |
# Cached render proxies the gallery's lazy-loaded turntables point at.
|
| 1371 |
# Registered before the Gradio mount so they're not shadowed by the
|
| 1372 |
# catch-all sub-app.
|
assets/metrics/mating_group.webp
ADDED
|
Git LFS Details
|
metrics_page.py
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2026 Hugging Face
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""Self-contained "Metrics" explainer page for the Space.
|
| 16 |
+
|
| 17 |
+
Builds one static, dependency-free HTML document explaining how a
|
| 18 |
+
candidate STEP is scored: the validity gate, the three orthogonal
|
| 19 |
+
axes (shape / topology / interface), and the editing renormalization.
|
| 20 |
+
|
| 21 |
+
It is curated (a Space-tailored summary, deliberately a little
|
| 22 |
+
duplicated from the canonical ``docs/metrics*`` in the code repo)
|
| 23 |
+
rather than rendered from those markdown files, because the docs use
|
| 24 |
+
repo-relative links + local illustration images that don't resolve
|
| 25 |
+
when hosted. The page links out to the GitHub deep-dives for the full
|
| 26 |
+
derivations, so the canonical source of truth stays there.
|
| 27 |
+
|
| 28 |
+
The page is served two ways from the same builder
|
| 29 |
+
(:func:`build_metrics_page`):
|
| 30 |
+
|
| 31 |
+
- as a standalone route ``/metrics`` (so the per-submission report's
|
| 32 |
+
headline metric pills can deep-link to ``/metrics#<anchor>``), and
|
| 33 |
+
- embedded in the "Metrics" Gradio tab via an iframe.
|
| 34 |
+
|
| 35 |
+
Formulas are plain monospace blocks (no MathJax / KaTeX), so the page
|
| 36 |
+
renders identically online and offline with no network dependency. The
|
| 37 |
+
anchor ids are a published contract the report links against; see
|
| 38 |
+
:data:`METRIC_ANCHORS`.
|
| 39 |
+
"""
|
| 40 |
+
from __future__ import annotations
|
| 41 |
+
|
| 42 |
+
# Section anchor ids. The per-submission report's headline pills link to
|
| 43 |
+
# ``/metrics#<anchor>``; keep these stable (and in sync with the
|
| 44 |
+
# report's pill links in cadgenbench's single_run.py).
|
| 45 |
+
METRIC_ANCHORS = {
|
| 46 |
+
"cad_score": "cad-score",
|
| 47 |
+
"shape": "shape-similarity",
|
| 48 |
+
"interface": "interface-match",
|
| 49 |
+
"topology": "topology-match",
|
| 50 |
+
"validity": "validity",
|
| 51 |
+
"editing": "editing",
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
# Canonical deep-dive docs live in the code repo; linked from each
|
| 55 |
+
# section so the Space page stays a summary and the full derivations
|
| 56 |
+
# have one source of truth.
|
| 57 |
+
_DOCS_BASE = "https://github.com/huggingface/cadgenbench/blob/main/docs"
|
| 58 |
+
|
| 59 |
+
# Bundled illustration served by the Space (see app.py's /metrics-assets
|
| 60 |
+
# route). Relative so it resolves same-origin whether the page is the
|
| 61 |
+
# standalone /metrics route or the iframe in the Metrics tab.
|
| 62 |
+
_MATING_GROUP_IMG = "/metrics-assets/mating_group.webp"
|
| 63 |
+
|
| 64 |
+
_CSS = """\
|
| 65 |
+
* { box-sizing: border-box; }
|
| 66 |
+
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
| 67 |
+
max-width: 960px; margin: 0 auto; padding: 24px 20px 80px;
|
| 68 |
+
background: #f8f9fa; color: #1f2430; line-height: 1.55; }
|
| 69 |
+
a { color: #1565c0; }
|
| 70 |
+
h1 { font-size: 1.7em; margin: 0 0 4px; }
|
| 71 |
+
.lede { color: #5b6170; margin: 0 0 20px; }
|
| 72 |
+
.card { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
|
| 73 |
+
padding: 20px 24px; margin: 16px 0; box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
| 74 |
+
scroll-margin-top: 16px; }
|
| 75 |
+
.card h2 { margin: 0 0 10px; font-size: 1.2em; display: flex; align-items: baseline;
|
| 76 |
+
gap: 10px; }
|
| 77 |
+
.card h3 { font-size: 0.98em; margin: 16px 0 4px; color: #37474f; }
|
| 78 |
+
.axis-tag { font-family: monospace; font-size: 0.62em; font-weight: 700;
|
| 79 |
+
text-transform: uppercase; letter-spacing: 0.04em; padding: 3px 8px;
|
| 80 |
+
border-radius: 6px; }
|
| 81 |
+
.t-cad { border-left: 5px solid #37474f; }
|
| 82 |
+
.t-cad .axis-tag { background: #eceff1; color: #37474f; }
|
| 83 |
+
.t-shape { border-left: 5px solid #1565c0; }
|
| 84 |
+
.t-shape .axis-tag { background: #e3f2fd; color: #1565c0; }
|
| 85 |
+
.t-iface { border-left: 5px solid #4527a0; }
|
| 86 |
+
.t-iface .axis-tag { background: #ede7f6; color: #4527a0; }
|
| 87 |
+
.t-topo { border-left: 5px solid #006d77; }
|
| 88 |
+
.t-topo .axis-tag { background: #d8f3f4; color: #006d77; }
|
| 89 |
+
.t-gate { border-left: 5px solid #c62828; }
|
| 90 |
+
.t-gate .axis-tag { background: #ffebee; color: #c62828; }
|
| 91 |
+
.t-edit { border-left: 5px solid #9e7700; }
|
| 92 |
+
.t-edit .axis-tag { background: #fff9c4; color: #9e7700; }
|
| 93 |
+
pre.formula { background: #0f1525; color: #e7ecf5; border-radius: 8px;
|
| 94 |
+
padding: 14px 16px; overflow-x: auto; font-size: 0.86em;
|
| 95 |
+
line-height: 1.5; margin: 10px 0; }
|
| 96 |
+
code { background: #eef0f4; padding: 1px 5px; border-radius: 4px;
|
| 97 |
+
font-size: 0.88em; }
|
| 98 |
+
table { border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 0.92em; }
|
| 99 |
+
th, td { border: 1px solid #e3e5ea; padding: 7px 10px; text-align: left; }
|
| 100 |
+
th { background: #f5f7fa; }
|
| 101 |
+
.deep { font-size: 0.86em; color: #5b6170; margin-top: 12px; }
|
| 102 |
+
.toc { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
|
| 103 |
+
padding: 14px 20px; margin: 16px 0; }
|
| 104 |
+
.toc ul { margin: 6px 0 0; padding-left: 18px; }
|
| 105 |
+
.note { color: #5b6170; font-size: 0.92em; }
|
| 106 |
+
figure.fig { margin: 14px 0; }
|
| 107 |
+
figure.fig img { display: block; width: 100%; max-width: 520px; height: auto;
|
| 108 |
+
border: 1px solid #e3e5ea; border-radius: 10px; background: #fff; }
|
| 109 |
+
figure.fig figcaption { font-size: 0.84em; color: #5b6170; margin-top: 6px;
|
| 110 |
+
max-width: 560px; }
|
| 111 |
+
.weight-pill { font-family: monospace; font-size: 0.8em; padding: 1px 7px;
|
| 112 |
+
border-radius: 6px; background: #eceff1; color: #37474f; }
|
| 113 |
+
"""
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _section(
|
| 117 |
+
*, anchor: str, css_class: str, tag: str, title: str, body: str,
|
| 118 |
+
deep_dive: str | None = None,
|
| 119 |
+
) -> str:
|
| 120 |
+
deep = (
|
| 121 |
+
f'<p class="deep">Full derivation: '
|
| 122 |
+
f'<a href="{deep_dive}" target="_blank" rel="noopener">{deep_dive}</a></p>'
|
| 123 |
+
if deep_dive
|
| 124 |
+
else ""
|
| 125 |
+
)
|
| 126 |
+
return (
|
| 127 |
+
f'<section class="card {css_class}" id="{anchor}">'
|
| 128 |
+
f'<h2><span class="axis-tag">{tag}</span>{title}</h2>'
|
| 129 |
+
f"{body}{deep}"
|
| 130 |
+
"</section>"
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def build_metrics_page() -> str:
|
| 135 |
+
"""Return the full self-contained Metrics explainer HTML document."""
|
| 136 |
+
a = METRIC_ANCHORS
|
| 137 |
+
|
| 138 |
+
overview = _section(
|
| 139 |
+
anchor=a["cad_score"],
|
| 140 |
+
css_class="t-cad",
|
| 141 |
+
tag="CAD Score",
|
| 142 |
+
title="How one part is scored",
|
| 143 |
+
body=(
|
| 144 |
+
"<p>CADGenBench scores a generated part (a STEP file) against one "
|
| 145 |
+
"ground-truth STEP. First a hard <b>validity gate</b>; if it "
|
| 146 |
+
"passes, the <b>CAD Score</b> is a weighted mean of three "
|
| 147 |
+
"independent metrics, each in [0, 1].</p>"
|
| 148 |
+
'<pre class="formula">'
|
| 149 |
+
"cad_score = 0 if not valid\n"
|
| 150 |
+
" = 0.4*shape + 0.4*interface + 0.2*topology otherwise"
|
| 151 |
+
"</pre>"
|
| 152 |
+
"<p class='note'>(This is the <b>generation</b> composition. "
|
| 153 |
+
"<b>Editing</b> tasks renormalize the shape axis and reweight — "
|
| 154 |
+
f'see <a href="#{a["editing"]}">Editing tasks</a>.)</p>'
|
| 155 |
+
"<table><thead><tr><th>Component</th><th>Range</th>"
|
| 156 |
+
"<th>What it asks</th></tr></thead><tbody>"
|
| 157 |
+
f'<tr><td><a href="#{a["validity"]}">CAD Validity</a> (gate)</td>'
|
| 158 |
+
"<td>{0, 1}</td><td>Is the geometry valid?</td></tr>"
|
| 159 |
+
f'<tr><td><a href="#{a["shape"]}">Shape Similarity</a></td>'
|
| 160 |
+
"<td>[0, 1]</td><td>Does the bulk geometry match?</td></tr>"
|
| 161 |
+
f'<tr><td><a href="#{a["topology"]}">Topology Match</a></td>'
|
| 162 |
+
"<td>[0, 1]</td><td>Same pieces / holes / voids?</td></tr>"
|
| 163 |
+
f'<tr><td><a href="#{a["interface"]}">Interface Match</a></td>'
|
| 164 |
+
"<td>[0, 1]</td><td>Does it bolt up to the same fixture?</td></tr>"
|
| 165 |
+
"</tbody></table>"
|
| 166 |
+
"<h3>Why three axes</h3>"
|
| 167 |
+
"<p>They are orthogonal by construction — each catches errors the "
|
| 168 |
+
"others are blind to:</p>"
|
| 169 |
+
"<ul>"
|
| 170 |
+
"<li><b>Shape</b> catches wrong bulk geometry; blind to topology.</li>"
|
| 171 |
+
"<li><b>Topology</b> catches wrong hole / piece / void counts; blind "
|
| 172 |
+
"to feature position.</li>"
|
| 173 |
+
"<li><b>Interface</b> catches a misplaced / mis-sized mating feature; "
|
| 174 |
+
"blind to overall shape.</li>"
|
| 175 |
+
"</ul>"
|
| 176 |
+
"<p class='note'>Outputs are rigidly aligned to the ground truth "
|
| 177 |
+
"(rotation + translation only, never scale) before scoring.</p>"
|
| 178 |
+
),
|
| 179 |
+
deep_dive=f"{_DOCS_BASE}/metrics.md",
|
| 180 |
+
)
|
| 181 |
+
|
| 182 |
+
validity = _section(
|
| 183 |
+
anchor=a["validity"],
|
| 184 |
+
css_class="t-gate",
|
| 185 |
+
tag="Gate",
|
| 186 |
+
title="CAD Validity",
|
| 187 |
+
body=(
|
| 188 |
+
"<p>Runs before every other metric on the raw candidate STEP. Any "
|
| 189 |
+
"failure sets <code>is_valid = False</code> and forces "
|
| 190 |
+
"<code>cad_score = 0</code>, so an invalid solid never beats a worse "
|
| 191 |
+
"but valid one. Passing requires all of:</p>"
|
| 192 |
+
"<ol>"
|
| 193 |
+
"<li><b>Well-formed BREP</b> — no per-face / edge / vertex errors "
|
| 194 |
+
"(self-intersecting wires, edges off their surface, etc.).</li>"
|
| 195 |
+
"<li><b>Watertight</b> — every shell is closed; no naked or free "
|
| 196 |
+
"edges.</li>"
|
| 197 |
+
"<li><b>Meshable as a closed orientable manifold</b> — tessellates "
|
| 198 |
+
"to a manifold, closed (3F = 2E), orientation-consistent triangle "
|
| 199 |
+
"mesh.</li>"
|
| 200 |
+
"</ol>"
|
| 201 |
+
),
|
| 202 |
+
deep_dive=f"{_DOCS_BASE}/metrics/cad_validity.md",
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
shape = _section(
|
| 206 |
+
anchor=a["shape"],
|
| 207 |
+
css_class="t-shape",
|
| 208 |
+
tag="Shape",
|
| 209 |
+
title="Shape Similarity",
|
| 210 |
+
body=(
|
| 211 |
+
"<p>Does the bulk geometry match? The mean of two complementary "
|
| 212 |
+
"sub-metrics, each in [0, 1]:</p>"
|
| 213 |
+
'<pre class="formula">'
|
| 214 |
+
"shape_similarity = 0.5 * (point_cloud_F1 + volume_IoU)"
|
| 215 |
+
"</pre>"
|
| 216 |
+
"<h3>Point-cloud F1</h3>"
|
| 217 |
+
"<p>Checks the candidate's surface sits where the GT's does and "
|
| 218 |
+
"faces the same way. Points are sampled across both surfaces with "
|
| 219 |
+
"their outward normals; a point matches when the nearest point on "
|
| 220 |
+
"the other surface is within 0.5% of the GT bounding-box diagonal "
|
| 221 |
+
"<b>and</b> the normals agree to within 20°. Precision and recall "
|
| 222 |
+
"combine into F1.</p>"
|
| 223 |
+
"<h3>Volume IoU</h3>"
|
| 224 |
+
"<p>Shared volume of the two solids over their combined volume "
|
| 225 |
+
"(intersection over union), via a Boolean kernel.</p>"
|
| 226 |
+
"<p class='note'>Both use a tolerance proportional to part size, so "
|
| 227 |
+
"small features can move without shifting the score — those are "
|
| 228 |
+
f'covered by <a href="#{a["interface"]}">interface match</a>.</p>'
|
| 229 |
+
),
|
| 230 |
+
deep_dive=f"{_DOCS_BASE}/metrics/shape_similarity.md",
|
| 231 |
+
)
|
| 232 |
+
|
| 233 |
+
topology = _section(
|
| 234 |
+
anchor=a["topology"],
|
| 235 |
+
css_class="t-topo",
|
| 236 |
+
tag="Topo",
|
| 237 |
+
title="Topology Match",
|
| 238 |
+
body=(
|
| 239 |
+
"<p>Does the candidate have the same number of pieces, "
|
| 240 |
+
"through-holes, and internal voids? It compares the three "
|
| 241 |
+
"<b>Betti numbers</b> of the solid:</p>"
|
| 242 |
+
"<ul>"
|
| 243 |
+
"<li><b>b₀</b>: connected solid components (pieces).</li>"
|
| 244 |
+
"<li><b>b₁</b>: independent through-handles (e.g. "
|
| 245 |
+
"through-holes).</li>"
|
| 246 |
+
"<li><b>b₂</b>: enclosed internal voids (cavities).</li>"
|
| 247 |
+
"</ul>"
|
| 248 |
+
"<p>Each axis gets a fuzzy log-ratio against GT, sharpened by "
|
| 249 |
+
"α = 2, and the three are <b>multiplied</b>:</p>"
|
| 250 |
+
'<pre class="formula">'
|
| 251 |
+
"s_i = ((min(cand,gt) + 1) / (max(cand,gt) + 1)) ^ 2\n"
|
| 252 |
+
"topology_match = s_0 * s_1 * s_2"
|
| 253 |
+
"</pre>"
|
| 254 |
+
"<p>The product (not the mean) means one wrong count collapses the "
|
| 255 |
+
"score: topology is discrete, so two of three right is not a partial "
|
| 256 |
+
"match. Example: GT (1,2,0) vs candidate (1,4,0) scores "
|
| 257 |
+
"(3/5)² = 0.36. Blind features (blind pockets, fillets, "
|
| 258 |
+
"chamfers) are topologically trivial and covered by the other "
|
| 259 |
+
"axes.</p>"
|
| 260 |
+
),
|
| 261 |
+
deep_dive=f"{_DOCS_BASE}/metrics/topo_match.md",
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
interface = _section(
|
| 265 |
+
anchor=a["interface"],
|
| 266 |
+
css_class="t-iface",
|
| 267 |
+
tag="Interface",
|
| 268 |
+
title="Interface Match",
|
| 269 |
+
body=(
|
| 270 |
+
"<p>Would it bolt up to the same fixture? Each mating feature is a "
|
| 271 |
+
"region of space the candidate must match in shape, size, and "
|
| 272 |
+
"position:</p>"
|
| 273 |
+
"<ul>"
|
| 274 |
+
"<li><b>Keep-out (KOR)</b> — must be empty (a bolt hole, a slot).</li>"
|
| 275 |
+
"<li><b>Keep-in (KIR)</b> — must be solid (a locating boss, a "
|
| 276 |
+
"pin).</li>"
|
| 277 |
+
"</ul>"
|
| 278 |
+
"<h3>Mating groups</h3>"
|
| 279 |
+
"<p>The features that must seat together against a single fixture "
|
| 280 |
+
"form one <b>mating group</b> — here, two bolt holes and a slot that "
|
| 281 |
+
"one jig drops into. A part can have several independent groups (say "
|
| 282 |
+
"a bolt pattern on one face and a boss on another), and each group "
|
| 283 |
+
"is scored on its own.</p>"
|
| 284 |
+
'<figure class="fig">'
|
| 285 |
+
f'<img src="{_MATING_GROUP_IMG}" loading="lazy" '
|
| 286 |
+
'alt="A jig with two pins and a slot key seating into a part\'s two '
|
| 287 |
+
'bolt holes and slot">'
|
| 288 |
+
"<figcaption>A mating group: a jig with two pins and a slot key "
|
| 289 |
+
"seats into the part's two bolt holes and slot. The candidate has "
|
| 290 |
+
"to fit the same fixture.</figcaption>"
|
| 291 |
+
"</figure>"
|
| 292 |
+
"<h3>Scoring</h3>"
|
| 293 |
+
"<p>Per group:</p>"
|
| 294 |
+
"<ol>"
|
| 295 |
+
"<li><b>Per-feature fit</b> — volumetric IoU against the region "
|
| 296 |
+
"(with a thin shell of opposite material, so both oversize and "
|
| 297 |
+
"undersize lose points).</li>"
|
| 298 |
+
"<li><b>Bounded pose search</b> — ±1° and ±1% of part "
|
| 299 |
+
"size per axis, so a feature isn't penalized for the residual of "
|
| 300 |
+
"whole-part alignment.</li>"
|
| 301 |
+
"<li><b>Pass/fail ramp</b> — IoU ≥ 0.95 → 1, ≤ 0.80 "
|
| 302 |
+
"→ 0, linear between; a sloppy fit scores 0.</li>"
|
| 303 |
+
"</ol>"
|
| 304 |
+
"<p>A group scores as its <b>worst</b> feature (the minimum); the "
|
| 305 |
+
"fixture scores as the <b>mean</b> over its groups, so nailing one "
|
| 306 |
+
"interface and missing another still earns partial credit.</p>"
|
| 307 |
+
"<p class='note'>In the report's overlay: <b>blue</b> where it fits, "
|
| 308 |
+
"<b>red</b> where the candidate has material it shouldn't (too much), "
|
| 309 |
+
"<b>amber</b> where it's missing material it should have (too "
|
| 310 |
+
"little).</p>"
|
| 311 |
+
),
|
| 312 |
+
deep_dive=f"{_DOCS_BASE}/metrics/interface_match.md",
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
editing = _section(
|
| 316 |
+
anchor=a["editing"],
|
| 317 |
+
css_class="t-edit",
|
| 318 |
+
tag="Editing",
|
| 319 |
+
title="Editing tasks: no-op renormalization",
|
| 320 |
+
body=(
|
| 321 |
+
"<p>Editing fixtures ship an <code>input.step</code> plus an edit "
|
| 322 |
+
"request; the GT is a small change to that input. Since all three "
|
| 323 |
+
"axes measure global similarity, submitting the input unchanged "
|
| 324 |
+
"(the <b>no-op</b>) already scores high, so the raw composition "
|
| 325 |
+
"would reward doing nothing.</p>"
|
| 326 |
+
"<p>The fix renormalizes the <b>shape</b> axis against the no-op "
|
| 327 |
+
"baseline <code>b = shape_similarity(input, GT)</code>:</p>"
|
| 328 |
+
'<pre class="formula">'
|
| 329 |
+
"s_renorm = max(0, (shape_similarity - b) / (1 - b))\n"
|
| 330 |
+
"cad_score = 0.6*s_renorm + 0.3*interface + 0.1*topology (0 if not valid)"
|
| 331 |
+
"</pre>"
|
| 332 |
+
"<p>This maps the no-op to 0 and a perfect candidate to 1. Topology "
|
| 333 |
+
"and interface stay raw (most edits leave them unchanged). A no-op "
|
| 334 |
+
"therefore caps at 0.3 + 0.1 = 0.4, and any real shape improvement "
|
| 335 |
+
"clears it.</p>"
|
| 336 |
+
),
|
| 337 |
+
deep_dive=f"{_DOCS_BASE}/metrics.md#editing-tasks-no-op-renormalization",
|
| 338 |
+
)
|
| 339 |
+
|
| 340 |
+
toc = (
|
| 341 |
+
'<nav class="toc"><b>On this page</b><ul>'
|
| 342 |
+
f'<li><a href="#{a["cad_score"]}">CAD Score — how one part is scored</a></li>'
|
| 343 |
+
f'<li><a href="#{a["validity"]}">CAD Validity (gate)</a></li>'
|
| 344 |
+
f'<li><a href="#{a["shape"]}">Shape Similarity</a></li>'
|
| 345 |
+
f'<li><a href="#{a["topology"]}">Topology Match</a></li>'
|
| 346 |
+
f'<li><a href="#{a["interface"]}">Interface Match</a></li>'
|
| 347 |
+
f'<li><a href="#{a["editing"]}">Editing tasks</a></li>'
|
| 348 |
+
"</ul></nav>"
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
return (
|
| 352 |
+
"<!DOCTYPE html><html lang='en'><head>"
|
| 353 |
+
"<meta charset='utf-8'>"
|
| 354 |
+
"<meta name='viewport' content='width=device-width, initial-scale=1'>"
|
| 355 |
+
"<title>CADGenBench — Metrics</title>"
|
| 356 |
+
f"<style>{_CSS}</style>"
|
| 357 |
+
"</head><body>"
|
| 358 |
+
"<h1>Metrics</h1>"
|
| 359 |
+
"<p class='lede'>How CADGenBench scores one generated CAD part against "
|
| 360 |
+
"the ground truth. These metrics are new, so this page explains each "
|
| 361 |
+
"one; the canonical reference lives in the "
|
| 362 |
+
f'<a href="{_DOCS_BASE}/metrics.md" target="_blank" rel="noopener">'
|
| 363 |
+
"code repo</a>.</p>"
|
| 364 |
+
f"{toc}{overview}{validity}{shape}{topology}{interface}{editing}"
|
| 365 |
+
"</body></html>"
|
| 366 |
+
)
|
submit.py
CHANGED
|
@@ -144,6 +144,11 @@ REPORTS_DIR = "reports"
|
|
| 144 |
# registered in app.py and the constants in the eval job's eval_job.py.
|
| 145 |
GT_PROXY_BASE_URL = "/gt"
|
| 146 |
INPUT_PROXY_BASE_URL = "/task-input"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
DATA_REV_SHORT_LEN = 12
|
| 148 |
FAILURE_REASON_MAX_CHARS = 200
|
| 149 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
@@ -970,6 +975,32 @@ def _download_results_jsonl() -> str:
|
|
| 970 |
return Path(path).read_text(encoding="utf-8")
|
| 971 |
|
| 972 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 973 |
def _resolve_data_revision() -> str:
|
| 974 |
"""Return a short sha for the cadgenbench-data dataset, cached per process.
|
| 975 |
|
|
@@ -1561,9 +1592,11 @@ def _merge_shards_and_publish(
|
|
| 1561 |
# the Space's token-holding proxy routes.
|
| 1562 |
html = generate_html(
|
| 1563 |
run_data,
|
|
|
|
| 1564 |
render_base_url=render_submission_base_url(submission_id),
|
| 1565 |
gt_base_url=GT_PROXY_BASE_URL,
|
| 1566 |
input_base_url=INPUT_PROXY_BASE_URL,
|
|
|
|
| 1567 |
download_url=_submission_zip_url(submission_id),
|
| 1568 |
)
|
| 1569 |
html_path = tmp / f"{submission_id}.html"
|
|
|
|
| 144 |
# registered in app.py and the constants in the eval job's eval_job.py.
|
| 145 |
GT_PROXY_BASE_URL = "/gt"
|
| 146 |
INPUT_PROXY_BASE_URL = "/task-input"
|
| 147 |
+
# Same-origin route the Space serves the metrics explainer at (see
|
| 148 |
+
# app.py). Passed to the report generator so its headline metric pills
|
| 149 |
+
# deep-link to `/metrics#<anchor>`; relative so it resolves against the
|
| 150 |
+
# Space origin whether the report is opened locally or on huggingface.co.
|
| 151 |
+
METRICS_PAGE_URL = "/metrics"
|
| 152 |
DATA_REV_SHORT_LEN = 12
|
| 153 |
FAILURE_REASON_MAX_CHARS = 200
|
| 154 |
SHA256_BLOCK_SIZE = 64 * 1024
|
|
|
|
| 975 |
return Path(path).read_text(encoding="utf-8")
|
| 976 |
|
| 977 |
|
| 978 |
+
def _submission_name_for(submission_id: str) -> str | None:
|
| 979 |
+
"""Human-readable submission name from the row, for the report heading.
|
| 980 |
+
|
| 981 |
+
Read off ``results.jsonl`` (the pending row written at submit time
|
| 982 |
+
already carries ``submission_name``) so the merged report can title
|
| 983 |
+
itself with the submission name rather than the opaque id. Best
|
| 984 |
+
effort: any read miss / Hub blip returns ``None`` and the report
|
| 985 |
+
falls back to its ``CADGenBench / <timestamp>`` heading.
|
| 986 |
+
"""
|
| 987 |
+
try:
|
| 988 |
+
body = _download_results_jsonl()
|
| 989 |
+
for line in body.splitlines():
|
| 990 |
+
if not line.strip():
|
| 991 |
+
continue
|
| 992 |
+
row = json.loads(line)
|
| 993 |
+
if row.get("submission_id") == submission_id:
|
| 994 |
+
name = row.get("submission_name")
|
| 995 |
+
return str(name) if name else None
|
| 996 |
+
except Exception as e: # noqa: BLE001 - heading is cosmetic, never fail merge
|
| 997 |
+
logger.warning(
|
| 998 |
+
"Could not resolve submission_name for %s (%s: %s)",
|
| 999 |
+
submission_id, type(e).__name__, e,
|
| 1000 |
+
)
|
| 1001 |
+
return None
|
| 1002 |
+
|
| 1003 |
+
|
| 1004 |
def _resolve_data_revision() -> str:
|
| 1005 |
"""Return a short sha for the cadgenbench-data dataset, cached per process.
|
| 1006 |
|
|
|
|
| 1592 |
# the Space's token-holding proxy routes.
|
| 1593 |
html = generate_html(
|
| 1594 |
run_data,
|
| 1595 |
+
submission_name=_submission_name_for(submission_id),
|
| 1596 |
render_base_url=render_submission_base_url(submission_id),
|
| 1597 |
gt_base_url=GT_PROXY_BASE_URL,
|
| 1598 |
input_base_url=INPUT_PROXY_BASE_URL,
|
| 1599 |
+
metrics_base_url=METRICS_PAGE_URL,
|
| 1600 |
download_url=_submission_zip_url(submission_id),
|
| 1601 |
)
|
| 1602 |
html_path = tmp / f"{submission_id}.html"
|