Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

Michael Rabinovich Cursor commited on Jun 7

Commit

28e0081

1 Parent(s): f17ac64

leaderboard: add Metrics tab/page + report deep-links

New self-contained Metrics explainer (metrics_page.py) served at
/metrics and embedded in a Metrics tab: validity gate, the three axes,
editing renormalization, with formulas and the interface mating-group
illustration (vendored under assets/metrics/ via LFS, served by the
/metrics-assets route). Submit's merge path passes the submission name
and the /metrics base URL to the report generator so hosted reports
title themselves and their metric pills deep-link to the explainer.

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (5) hide show

.gitattributes +1 -0
app.py +63 -0
assets/metrics/mating_group.webp +3 -0
metrics_page.py +366 -0
submit.py +33 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.webp filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -59,6 +59,7 @@ from leaderboard import (
     render_public_url,
 )
 from gallery import render_gallery_page
 from tasks import load_tasks_from_dir, render_tasks_page
 from admin import (
     VALID_METHODS,
@@ -650,6 +651,43 @@ def serve_report(submission_id: str) -> Response:
     return Response(content=content, media_type="text/html; charset=utf-8")
 def _fetch_gt_render(fixture: str) -> bytes | None:
     """Pull a fixture's ground-truth GIF from the private GT dataset.
@@ -1002,6 +1040,17 @@ with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as block
         tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
         tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
     with gr.Tab("Submit"):
         gr.Markdown(
             f"""
@@ -1304,6 +1353,20 @@ app.add_api_route(
     serve_report,
     methods=["GET"],
 )
 # Cached render proxies the gallery's lazy-loaded turntables point at.
 # Registered before the Gradio mount so they're not shadowed by the
 # catch-all sub-app.

     render_public_url,
 )
 from gallery import render_gallery_page
+from metrics_page import build_metrics_page
 from tasks import load_tasks_from_dir, render_tasks_page
 from admin import (
     VALID_METHODS,
     return Response(content=content, media_type="text/html; charset=utf-8")
+def serve_metrics_page() -> Response:
+    """Serve the static metrics explainer at ``/metrics``.
+    Same-origin as the report proxy (``/reports/<id>.html``), so a
+    hosted report's headline pills can deep-link to ``/metrics#<anchor>``
+    and land on the matching section. The "Metrics" Gradio tab embeds
+    this same route in an iframe.
+    """
+    return HTMLResponse(content=build_metrics_page())
+# Illustration assets the metrics page embeds (e.g. the interface-match
+# mating-group WebP). Vendored into the Space repo under `assets/metrics/`
+# and served here so the page renders self-contained, with no dependency
+# on the code repo's raw GitHub URLs staying reachable.
+METRICS_ASSETS_DIR = Path(__file__).parent / "assets" / "metrics"
+def serve_metrics_asset(name: str) -> Response:
+    """Serve a bundled metrics illustration from ``assets/metrics/``.
+    Flat namespace (no nested paths), traversal-guarded. Cached hard:
+    these are static, versioned-with-the-repo assets.
+    """
+    if "/" in name or ".." in name:
+        return Response(status_code=404)
+    path = METRICS_ASSETS_DIR / name
+    if not path.is_file():
+        return Response(status_code=404)
+    media_type = mimetypes.guess_type(name)[0] or "application/octet-stream"
+    return Response(
+        content=path.read_bytes(),
+        media_type=media_type,
+        headers={"Cache-Control": RENDER_CACHE_CONTROL},
+    )
 def _fetch_gt_render(fixture: str) -> bytes | None:
     """Pull a fixture's ground-truth GIF from the private GT dataset.
         tasks_refresh_btn = gr.Button("Refresh tasks", size="sm")
         tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html)
+    with gr.Tab("Metrics"):
+        # Static explainer for the (new) scoring metrics. Served as a
+        # standalone `/metrics` route too, so the per-submission report's
+        # headline pills can deep-link to `/metrics#<anchor>`; the tab just
+        # embeds that same page in an iframe (single source of truth).
+        gr.HTML(
+            '<iframe src="/metrics" '
+            'style="width:100%; height:85vh; border:0; display:block;" '
+            'title="CADGenBench metrics"></iframe>'
+        )
     with gr.Tab("Submit"):
         gr.Markdown(
             f"""
     serve_report,
     methods=["GET"],
 )
+# Static metrics explainer. Same origin as the report proxy so report
+# pills can deep-link to `/metrics#<anchor>`; also embedded in the
+# Metrics tab. Registered before the Gradio mount so it isn't shadowed.
+app.add_api_route(
+    "/metrics",
+    serve_metrics_page,
+    methods=["GET"],
+)
+# Illustration assets the metrics page embeds (vendored under assets/metrics/).
+app.add_api_route(
+    "/metrics-assets/{name}",
+    serve_metrics_asset,
+    methods=["GET"],
+)
 # Cached render proxies the gallery's lazy-loaded turntables point at.
 # Registered before the Gradio mount so they're not shadowed by the
 # catch-all sub-app.

assets/metrics/mating_group.webp ADDED Viewed

Git LFS Details

SHA256: 6b8e39727c55a6618eb2cdba4da3c505e583283ccaa91bd7a2685b084be0eb98
Pointer size: 131 Bytes
Size of remote file: 350 kB

metrics_page.py ADDED Viewed

	@@ -0,0 +1,366 @@

+# Copyright 2026 Hugging Face
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Self-contained "Metrics" explainer page for the Space.
+Builds one static, dependency-free HTML document explaining how a
+candidate STEP is scored: the validity gate, the three orthogonal
+axes (shape / topology / interface), and the editing renormalization.
+It is curated (a Space-tailored summary, deliberately a little
+duplicated from the canonical ``docs/metrics*`` in the code repo)
+rather than rendered from those markdown files, because the docs use
+repo-relative links + local illustration images that don't resolve
+when hosted. The page links out to the GitHub deep-dives for the full
+derivations, so the canonical source of truth stays there.
+The page is served two ways from the same builder
+(:func:`build_metrics_page`):
+- as a standalone route ``/metrics`` (so the per-submission report's
+  headline metric pills can deep-link to ``/metrics#<anchor>``), and
+- embedded in the "Metrics" Gradio tab via an iframe.
+Formulas are plain monospace blocks (no MathJax / KaTeX), so the page
+renders identically online and offline with no network dependency. The
+anchor ids are a published contract the report links against; see
+:data:`METRIC_ANCHORS`.
+"""
+from __future__ import annotations
+# Section anchor ids. The per-submission report's headline pills link to
+# ``/metrics#<anchor>``; keep these stable (and in sync with the
+# report's pill links in cadgenbench's single_run.py).
+METRIC_ANCHORS = {
+    "cad_score": "cad-score",
+    "shape": "shape-similarity",
+    "interface": "interface-match",
+    "topology": "topology-match",
+    "validity": "validity",
+    "editing": "editing",
+}
+# Canonical deep-dive docs live in the code repo; linked from each
+# section so the Space page stays a summary and the full derivations
+# have one source of truth.
+_DOCS_BASE = "https://github.com/huggingface/cadgenbench/blob/main/docs"
+# Bundled illustration served by the Space (see app.py's /metrics-assets
+# route). Relative so it resolves same-origin whether the page is the
+# standalone /metrics route or the iframe in the Metrics tab.
+_MATING_GROUP_IMG = "/metrics-assets/mating_group.webp"
+_CSS = """\
+* { box-sizing: border-box; }
+body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
+       max-width: 960px; margin: 0 auto; padding: 24px 20px 80px;
+       background: #f8f9fa; color: #1f2430; line-height: 1.55; }
+a { color: #1565c0; }
+h1 { font-size: 1.7em; margin: 0 0 4px; }
+.lede { color: #5b6170; margin: 0 0 20px; }
+.card { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
+        padding: 20px 24px; margin: 16px 0; box-shadow: 0 1px 3px rgba(0,0,0,0.05);
+        scroll-margin-top: 16px; }
+.card h2 { margin: 0 0 10px; font-size: 1.2em; display: flex; align-items: baseline;
+           gap: 10px; }
+.card h3 { font-size: 0.98em; margin: 16px 0 4px; color: #37474f; }
+.axis-tag { font-family: monospace; font-size: 0.62em; font-weight: 700;
+            text-transform: uppercase; letter-spacing: 0.04em; padding: 3px 8px;
+            border-radius: 6px; }
+.t-cad { border-left: 5px solid #37474f; }
+.t-cad .axis-tag { background: #eceff1; color: #37474f; }
+.t-shape { border-left: 5px solid #1565c0; }
+.t-shape .axis-tag { background: #e3f2fd; color: #1565c0; }
+.t-iface { border-left: 5px solid #4527a0; }
+.t-iface .axis-tag { background: #ede7f6; color: #4527a0; }
+.t-topo { border-left: 5px solid #006d77; }
+.t-topo .axis-tag { background: #d8f3f4; color: #006d77; }
+.t-gate { border-left: 5px solid #c62828; }
+.t-gate .axis-tag { background: #ffebee; color: #c62828; }
+.t-edit { border-left: 5px solid #9e7700; }
+.t-edit .axis-tag { background: #fff9c4; color: #9e7700; }
+pre.formula { background: #0f1525; color: #e7ecf5; border-radius: 8px;
+              padding: 14px 16px; overflow-x: auto; font-size: 0.86em;
+              line-height: 1.5; margin: 10px 0; }
+code { background: #eef0f4; padding: 1px 5px; border-radius: 4px;
+       font-size: 0.88em; }
+table { border-collapse: collapse; width: 100%; margin: 12px 0; font-size: 0.92em; }
+th, td { border: 1px solid #e3e5ea; padding: 7px 10px; text-align: left; }
+th { background: #f5f7fa; }
+.deep { font-size: 0.86em; color: #5b6170; margin-top: 12px; }
+.toc { background: #fff; border: 1px solid #e3e5ea; border-radius: 12px;
+       padding: 14px 20px; margin: 16px 0; }
+.toc ul { margin: 6px 0 0; padding-left: 18px; }
+.note { color: #5b6170; font-size: 0.92em; }
+figure.fig { margin: 14px 0; }
+figure.fig img { display: block; width: 100%; max-width: 520px; height: auto;
+                 border: 1px solid #e3e5ea; border-radius: 10px; background: #fff; }
+figure.fig figcaption { font-size: 0.84em; color: #5b6170; margin-top: 6px;
+                        max-width: 560px; }
+.weight-pill { font-family: monospace; font-size: 0.8em; padding: 1px 7px;
+               border-radius: 6px; background: #eceff1; color: #37474f; }
+"""
+def _section(
+    *, anchor: str, css_class: str, tag: str, title: str, body: str,
+    deep_dive: str | None = None,
+) -> str:
+    deep = (
+        f'<p class="deep">Full derivation: '
+        f'<a href="{deep_dive}" target="_blank" rel="noopener">{deep_dive}</a></p>'
+        if deep_dive
+        else ""
+    )
+    return (
+        f'<section class="card {css_class}" id="{anchor}">'
+        f'<h2><span class="axis-tag">{tag}</span>{title}</h2>'
+        f"{body}{deep}"
+        "</section>"
+    )
+def build_metrics_page() -> str:
+    """Return the full self-contained Metrics explainer HTML document."""
+    a = METRIC_ANCHORS
+    overview = _section(
+        anchor=a["cad_score"],
+        css_class="t-cad",
+        tag="CAD Score",
+        title="How one part is scored",
+        body=(
+            "<p>CADGenBench scores a generated part (a STEP file) against one "
+            "ground-truth STEP. First a hard <b>validity gate</b>; if it "
+            "passes, the <b>CAD Score</b> is a weighted mean of three "
+            "independent metrics, each in [0, 1].</p>"
+            '<pre class="formula">'
+            "cad_score = 0                                                if not valid\n"
+            "          = 0.4*shape + 0.4*interface + 0.2*topology          otherwise"
+            "</pre>"
+            "<p class='note'>(This is the <b>generation</b> composition. "
+            "<b>Editing</b> tasks renormalize the shape axis and reweight — "
+            f'see <a href="#{a["editing"]}">Editing tasks</a>.)</p>'
+            "<table><thead><tr><th>Component</th><th>Range</th>"
+            "<th>What it asks</th></tr></thead><tbody>"
+            f'<tr><td><a href="#{a["validity"]}">CAD Validity</a> (gate)</td>'
+            "<td>{0, 1}</td><td>Is the geometry valid?</td></tr>"
+            f'<tr><td><a href="#{a["shape"]}">Shape Similarity</a></td>'
+            "<td>[0, 1]</td><td>Does the bulk geometry match?</td></tr>"
+            f'<tr><td><a href="#{a["topology"]}">Topology Match</a></td>'
+            "<td>[0, 1]</td><td>Same pieces / holes / voids?</td></tr>"
+            f'<tr><td><a href="#{a["interface"]}">Interface Match</a></td>'
+            "<td>[0, 1]</td><td>Does it bolt up to the same fixture?</td></tr>"
+            "</tbody></table>"
+            "<h3>Why three axes</h3>"
+            "<p>They are orthogonal by construction — each catches errors the "
+            "others are blind to:</p>"
+            "<ul>"
+            "<li><b>Shape</b> catches wrong bulk geometry; blind to topology.</li>"
+            "<li><b>Topology</b> catches wrong hole / piece / void counts; blind "
+            "to feature position.</li>"
+            "<li><b>Interface</b> catches a misplaced / mis-sized mating feature; "
+            "blind to overall shape.</li>"
+            "</ul>"
+            "<p class='note'>Outputs are rigidly aligned to the ground truth "
+            "(rotation + translation only, never scale) before scoring.</p>"
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics.md",
+    )
+    validity = _section(
+        anchor=a["validity"],
+        css_class="t-gate",
+        tag="Gate",
+        title="CAD Validity",
+        body=(
+            "<p>Runs before every other metric on the raw candidate STEP. Any "
+            "failure sets <code>is_valid = False</code> and forces "
+            "<code>cad_score = 0</code>, so an invalid solid never beats a worse "
+            "but valid one. Passing requires all of:</p>"
+            "<ol>"
+            "<li><b>Well-formed BREP</b> — no per-face / edge / vertex errors "
+            "(self-intersecting wires, edges off their surface, etc.).</li>"
+            "<li><b>Watertight</b> — every shell is closed; no naked or free "
+            "edges.</li>"
+            "<li><b>Meshable as a closed orientable manifold</b> — tessellates "
+            "to a manifold, closed (3F = 2E), orientation-consistent triangle "
+            "mesh.</li>"
+            "</ol>"
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics/cad_validity.md",
+    )
+    shape = _section(
+        anchor=a["shape"],
+        css_class="t-shape",
+        tag="Shape",
+        title="Shape Similarity",
+        body=(
+            "<p>Does the bulk geometry match? The mean of two complementary "
+            "sub-metrics, each in [0, 1]:</p>"
+            '<pre class="formula">'
+            "shape_similarity = 0.5 * (point_cloud_F1 + volume_IoU)"
+            "</pre>"
+            "<h3>Point-cloud F1</h3>"
+            "<p>Checks the candidate's surface sits where the GT's does and "
+            "faces the same way. Points are sampled across both surfaces with "
+            "their outward normals; a point matches when the nearest point on "
+            "the other surface is within 0.5% of the GT bounding-box diagonal "
+            "<b>and</b> the normals agree to within 20°. Precision and recall "
+            "combine into F1.</p>"
+            "<h3>Volume IoU</h3>"
+            "<p>Shared volume of the two solids over their combined volume "
+            "(intersection over union), via a Boolean kernel.</p>"
+            "<p class='note'>Both use a tolerance proportional to part size, so "
+            "small features can move without shifting the score — those are "
+            f'covered by <a href="#{a["interface"]}">interface match</a>.</p>'
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics/shape_similarity.md",
+    )
+    topology = _section(
+        anchor=a["topology"],
+        css_class="t-topo",
+        tag="Topo",
+        title="Topology Match",
+        body=(
+            "<p>Does the candidate have the same number of pieces, "
+            "through-holes, and internal voids? It compares the three "
+            "<b>Betti numbers</b> of the solid:</p>"
+            "<ul>"
+            "<li><b>b&#8320;</b>: connected solid components (pieces).</li>"
+            "<li><b>b&#8321;</b>: independent through-handles (e.g. "
+            "through-holes).</li>"
+            "<li><b>b&#8322;</b>: enclosed internal voids (cavities).</li>"
+            "</ul>"
+            "<p>Each axis gets a fuzzy log-ratio against GT, sharpened by "
+            "&#945; = 2, and the three are <b>multiplied</b>:</p>"
+            '<pre class="formula">'
+            "s_i = ((min(cand,gt) + 1) / (max(cand,gt) + 1)) ^ 2\n"
+            "topology_match = s_0 * s_1 * s_2"
+            "</pre>"
+            "<p>The product (not the mean) means one wrong count collapses the "
+            "score: topology is discrete, so two of three right is not a partial "
+            "match. Example: GT (1,2,0) vs candidate (1,4,0) scores "
+            "(3/5)&#178; = 0.36. Blind features (blind pockets, fillets, "
+            "chamfers) are topologically trivial and covered by the other "
+            "axes.</p>"
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics/topo_match.md",
+    )
+    interface = _section(
+        anchor=a["interface"],
+        css_class="t-iface",
+        tag="Interface",
+        title="Interface Match",
+        body=(
+            "<p>Would it bolt up to the same fixture? Each mating feature is a "
+            "region of space the candidate must match in shape, size, and "
+            "position:</p>"
+            "<ul>"
+            "<li><b>Keep-out (KOR)</b> — must be empty (a bolt hole, a slot).</li>"
+            "<li><b>Keep-in (KIR)</b> — must be solid (a locating boss, a "
+            "pin).</li>"
+            "</ul>"
+            "<h3>Mating groups</h3>"
+            "<p>The features that must seat together against a single fixture "
+            "form one <b>mating group</b> — here, two bolt holes and a slot that "
+            "one jig drops into. A part can have several independent groups (say "
+            "a bolt pattern on one face and a boss on another), and each group "
+            "is scored on its own.</p>"
+            '<figure class="fig">'
+            f'<img src="{_MATING_GROUP_IMG}" loading="lazy" '
+            'alt="A jig with two pins and a slot key seating into a part\'s two '
+            'bolt holes and slot">'
+            "<figcaption>A mating group: a jig with two pins and a slot key "
+            "seats into the part's two bolt holes and slot. The candidate has "
+            "to fit the same fixture.</figcaption>"
+            "</figure>"
+            "<h3>Scoring</h3>"
+            "<p>Per group:</p>"
+            "<ol>"
+            "<li><b>Per-feature fit</b> — volumetric IoU against the region "
+            "(with a thin shell of opposite material, so both oversize and "
+            "undersize lose points).</li>"
+            "<li><b>Bounded pose search</b> — &#177;1&#176; and &#177;1% of part "
+            "size per axis, so a feature isn't penalized for the residual of "
+            "whole-part alignment.</li>"
+            "<li><b>Pass/fail ramp</b> — IoU &#8805; 0.95 &#8594; 1, &#8804; 0.80 "
+            "&#8594; 0, linear between; a sloppy fit scores 0.</li>"
+            "</ol>"
+            "<p>A group scores as its <b>worst</b> feature (the minimum); the "
+            "fixture scores as the <b>mean</b> over its groups, so nailing one "
+            "interface and missing another still earns partial credit.</p>"
+            "<p class='note'>In the report's overlay: <b>blue</b> where it fits, "
+            "<b>red</b> where the candidate has material it shouldn't (too much), "
+            "<b>amber</b> where it's missing material it should have (too "
+            "little).</p>"
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics/interface_match.md",
+    )
+    editing = _section(
+        anchor=a["editing"],
+        css_class="t-edit",
+        tag="Editing",
+        title="Editing tasks: no-op renormalization",
+        body=(
+            "<p>Editing fixtures ship an <code>input.step</code> plus an edit "
+            "request; the GT is a small change to that input. Since all three "
+            "axes measure global similarity, submitting the input unchanged "
+            "(the <b>no-op</b>) already scores high, so the raw composition "
+            "would reward doing nothing.</p>"
+            "<p>The fix renormalizes the <b>shape</b> axis against the no-op "
+            "baseline <code>b = shape_similarity(input, GT)</code>:</p>"
+            '<pre class="formula">'
+            "s_renorm  = max(0, (shape_similarity - b) / (1 - b))\n"
+            "cad_score = 0.6*s_renorm + 0.3*interface + 0.1*topology   (0 if not valid)"
+            "</pre>"
+            "<p>This maps the no-op to 0 and a perfect candidate to 1. Topology "
+            "and interface stay raw (most edits leave them unchanged). A no-op "
+            "therefore caps at 0.3 + 0.1 = 0.4, and any real shape improvement "
+            "clears it.</p>"
+        ),
+        deep_dive=f"{_DOCS_BASE}/metrics.md#editing-tasks-no-op-renormalization",
+    )
+    toc = (
+        '<nav class="toc"><b>On this page</b><ul>'
+        f'<li><a href="#{a["cad_score"]}">CAD Score &mdash; how one part is scored</a></li>'
+        f'<li><a href="#{a["validity"]}">CAD Validity (gate)</a></li>'
+        f'<li><a href="#{a["shape"]}">Shape Similarity</a></li>'
+        f'<li><a href="#{a["topology"]}">Topology Match</a></li>'
+        f'<li><a href="#{a["interface"]}">Interface Match</a></li>'
+        f'<li><a href="#{a["editing"]}">Editing tasks</a></li>'
+        "</ul></nav>"
+    )
+    return (
+        "<!DOCTYPE html><html lang='en'><head>"
+        "<meta charset='utf-8'>"
+        "<meta name='viewport' content='width=device-width, initial-scale=1'>"
+        "<title>CADGenBench &mdash; Metrics</title>"
+        f"<style>{_CSS}</style>"
+        "</head><body>"
+        "<h1>Metrics</h1>"
+        "<p class='lede'>How CADGenBench scores one generated CAD part against "
+        "the ground truth. These metrics are new, so this page explains each "
+        "one; the canonical reference lives in the "
+        f'<a href="{_DOCS_BASE}/metrics.md" target="_blank" rel="noopener">'
+        "code repo</a>.</p>"
+        f"{toc}{overview}{validity}{shape}{topology}{interface}{editing}"
+        "</body></html>"
+    )

submit.py CHANGED Viewed

@@ -144,6 +144,11 @@ REPORTS_DIR = "reports"
 # registered in app.py and the constants in the eval job's eval_job.py.
 GT_PROXY_BASE_URL = "/gt"
 INPUT_PROXY_BASE_URL = "/task-input"
 DATA_REV_SHORT_LEN = 12
 FAILURE_REASON_MAX_CHARS = 200
 SHA256_BLOCK_SIZE = 64 * 1024
@@ -970,6 +975,32 @@ def _download_results_jsonl() -> str:
     return Path(path).read_text(encoding="utf-8")
 def _resolve_data_revision() -> str:
     """Return a short sha for the cadgenbench-data dataset, cached per process.
@@ -1561,9 +1592,11 @@ def _merge_shards_and_publish(
         # the Space's token-holding proxy routes.
         html = generate_html(
             run_data,
             render_base_url=render_submission_base_url(submission_id),
             gt_base_url=GT_PROXY_BASE_URL,
             input_base_url=INPUT_PROXY_BASE_URL,
             download_url=_submission_zip_url(submission_id),
         )
         html_path = tmp / f"{submission_id}.html"

 # registered in app.py and the constants in the eval job's eval_job.py.
 GT_PROXY_BASE_URL = "/gt"
 INPUT_PROXY_BASE_URL = "/task-input"
+# Same-origin route the Space serves the metrics explainer at (see
+# app.py). Passed to the report generator so its headline metric pills
+# deep-link to `/metrics#<anchor>`; relative so it resolves against the
+# Space origin whether the report is opened locally or on huggingface.co.
+METRICS_PAGE_URL = "/metrics"
 DATA_REV_SHORT_LEN = 12
 FAILURE_REASON_MAX_CHARS = 200
 SHA256_BLOCK_SIZE = 64 * 1024
     return Path(path).read_text(encoding="utf-8")
+def _submission_name_for(submission_id: str) -> str | None:
+    """Human-readable submission name from the row, for the report heading.
+    Read off ``results.jsonl`` (the pending row written at submit time
+    already carries ``submission_name``) so the merged report can title
+    itself with the submission name rather than the opaque id. Best
+    effort: any read miss / Hub blip returns ``None`` and the report
+    falls back to its ``CADGenBench / <timestamp>`` heading.
+    """
+    try:
+        body = _download_results_jsonl()
+        for line in body.splitlines():
+            if not line.strip():
+                continue
+            row = json.loads(line)
+            if row.get("submission_id") == submission_id:
+                name = row.get("submission_name")
+                return str(name) if name else None
+    except Exception as e:  # noqa: BLE001 - heading is cosmetic, never fail merge
+        logger.warning(
+            "Could not resolve submission_name for %s (%s: %s)",
+            submission_id, type(e).__name__, e,
+        )
+    return None
 def _resolve_data_revision() -> str:
     """Return a short sha for the cadgenbench-data dataset, cached per process.
         # the Space's token-holding proxy routes.
         html = generate_html(
             run_data,
+            submission_name=_submission_name_for(submission_id),
             render_base_url=render_submission_base_url(submission_id),
             gt_base_url=GT_PROXY_BASE_URL,
             input_base_url=INPUT_PROXY_BASE_URL,
+            metrics_base_url=METRICS_PAGE_URL,
             download_url=_submission_zip_url(submission_id),
         )
         html_path = tmp / f"{submission_id}.html"