Michael Rabinovich Cursor commited on
Commit ·
01d67e9
1
Parent(s): 3909559
add visual Gallery tab (top-10 verified, sticky GT, fixture picker)
Browse filesVisual-first leaderboard as a new default tab. Self-contained HTML
(gallery.py) inlined into an iframe srcdoc with base64 thumbnails, so
it works on the private Space where HF's edge 404s in-browser
custom-route fetches (same constraint the report viewer's srcdoc path
already handles). Top-10 verified only, teal sticky ground-truth row,
3-fixture picker (drop-oldest, global re-render), dashed cell for
invalid/missing fixtures, per-row generation/editing/validity expander,
and a GT-vs-output compare modal per thumbnail. Existing Leaderboard /
Submit / About / Admin tabs unchanged.
Co-authored-by: Cursor <cursoragent@cursor.com>
- app.py +122 -0
- gallery.py +562 -0
- leaderboard.py +4 -0
app.py
CHANGED
|
@@ -24,6 +24,7 @@ rather than render).
|
|
| 24 |
"""
|
| 25 |
from __future__ import annotations
|
| 26 |
|
|
|
|
| 27 |
import html
|
| 28 |
import logging
|
| 29 |
import os
|
|
@@ -41,6 +42,7 @@ from huggingface_hub import hf_hub_download
|
|
| 41 |
from leaderboard import (
|
| 42 |
ADMIN_COLUMNS,
|
| 43 |
ADMIN_SELECT_COL,
|
|
|
|
| 44 |
HF_DATA_REPO,
|
| 45 |
HF_SUBMISSIONS_REPO,
|
| 46 |
LEADERBOARD_COLS,
|
|
@@ -50,10 +52,12 @@ from leaderboard import (
|
|
| 50 |
VALIDATED_LEADERBOARD_DATATYPES,
|
| 51 |
LeaderboardDataError,
|
| 52 |
_fmt_timestamp,
|
|
|
|
| 53 |
build_combined_csv,
|
| 54 |
load_admin_table,
|
| 55 |
load_leaderboard_split,
|
| 56 |
)
|
|
|
|
| 57 |
from admin import (
|
| 58 |
VALID_METHODS,
|
| 59 |
delete_rows,
|
|
@@ -513,12 +517,130 @@ def serve_report(submission_id: str) -> Response:
|
|
| 513 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 514 |
|
| 515 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
|
| 517 |
gr.Markdown(
|
| 518 |
"# CADGenBench Leaderboard\n"
|
| 519 |
"_Benchmarking AI-driven CAD generation._"
|
| 520 |
)
|
| 521 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 522 |
with gr.Tab("Leaderboard"):
|
| 523 |
# Load both tiers once at boot. `_safe_load_split` keeps a Hub
|
| 524 |
# read failure from crashing the Space: on failure the frames
|
|
|
|
| 24 |
"""
|
| 25 |
from __future__ import annotations
|
| 26 |
|
| 27 |
+
import base64
|
| 28 |
import html
|
| 29 |
import logging
|
| 30 |
import os
|
|
|
|
| 42 |
from leaderboard import (
|
| 43 |
ADMIN_COLUMNS,
|
| 44 |
ADMIN_SELECT_COL,
|
| 45 |
+
HF_DATA_GT_REPO,
|
| 46 |
HF_DATA_REPO,
|
| 47 |
HF_SUBMISSIONS_REPO,
|
| 48 |
LEADERBOARD_COLS,
|
|
|
|
| 52 |
VALIDATED_LEADERBOARD_DATATYPES,
|
| 53 |
LeaderboardDataError,
|
| 54 |
_fmt_timestamp,
|
| 55 |
+
_load_rows_from_hub,
|
| 56 |
build_combined_csv,
|
| 57 |
load_admin_table,
|
| 58 |
load_leaderboard_split,
|
| 59 |
)
|
| 60 |
+
from gallery import render_gallery_page
|
| 61 |
from admin import (
|
| 62 |
VALID_METHODS,
|
| 63 |
delete_rows,
|
|
|
|
| 517 |
return Response(content=content, media_type="text/html; charset=utf-8")
|
| 518 |
|
| 519 |
|
| 520 |
+
# Single canonical view served as the gallery thumbnail. Matches the
|
| 521 |
+
# view uploaded by the eval job (eval_job.py GALLERY_THUMB_VIEW) and the
|
| 522 |
+
# GT render the gallery pairs it with, so columns stay comparable.
|
| 523 |
+
GALLERY_THUMB_VIEW = "iso"
|
| 524 |
+
|
| 525 |
+
|
| 526 |
+
@lru_cache(maxsize=512)
|
| 527 |
+
def _fetch_render(submission_id: str, fixture: str) -> bytes | None:
|
| 528 |
+
"""Pull a submission's gallery thumbnail (``renders/<id>/<fixture>.png``).
|
| 529 |
+
|
| 530 |
+
Cached in-process so repeated scrolls past the same thumbnail don't
|
| 531 |
+
re-hit the Hub. Returns ``None`` on any failure so the caller serves
|
| 532 |
+
a clean 404 (the gallery only requests this for fixtures it already
|
| 533 |
+
knows are valid, so a miss here is genuinely unexpected/transient).
|
| 534 |
+
"""
|
| 535 |
+
try:
|
| 536 |
+
local_path = hf_hub_download(
|
| 537 |
+
repo_id=HF_SUBMISSIONS_REPO,
|
| 538 |
+
filename=f"renders/{submission_id}/{fixture}.png",
|
| 539 |
+
repo_type="dataset",
|
| 540 |
+
)
|
| 541 |
+
return Path(local_path).read_bytes()
|
| 542 |
+
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 543 |
+
logger.warning(
|
| 544 |
+
"Failed to fetch render %s/%s (%s: %s)",
|
| 545 |
+
submission_id, fixture, type(e).__name__, e,
|
| 546 |
+
)
|
| 547 |
+
return None
|
| 548 |
+
|
| 549 |
+
|
| 550 |
+
@lru_cache(maxsize=256)
|
| 551 |
+
def _fetch_gt_render(fixture: str) -> bytes | None:
|
| 552 |
+
"""Pull a fixture's ground-truth thumbnail from the private GT dataset.
|
| 553 |
+
|
| 554 |
+
Path inside the GT repo is ``<fixture>/renders/<view>.png`` (see
|
| 555 |
+
``cadgenbench.common.paths.data_gt_dir``). GT renders are a property
|
| 556 |
+
of the data revision, not of any submission, so they're served
|
| 557 |
+
straight from the GT repo rather than duplicated per submission.
|
| 558 |
+
Needs the Space ``HF_TOKEN``'s read scope on the private repo.
|
| 559 |
+
"""
|
| 560 |
+
try:
|
| 561 |
+
local_path = hf_hub_download(
|
| 562 |
+
repo_id=HF_DATA_GT_REPO,
|
| 563 |
+
filename=f"{fixture}/renders/{GALLERY_THUMB_VIEW}.png",
|
| 564 |
+
repo_type="dataset",
|
| 565 |
+
)
|
| 566 |
+
return Path(local_path).read_bytes()
|
| 567 |
+
except Exception as e: # noqa: BLE001 - any Hub failure -> 404
|
| 568 |
+
logger.warning(
|
| 569 |
+
"Failed to fetch GT render for %s (%s: %s)",
|
| 570 |
+
fixture, type(e).__name__, e,
|
| 571 |
+
)
|
| 572 |
+
return None
|
| 573 |
+
|
| 574 |
+
|
| 575 |
+
def _data_uri(png_bytes: bytes | None) -> str | None:
|
| 576 |
+
"""Base64 ``data:`` URI for PNG bytes, or ``None``.
|
| 577 |
+
|
| 578 |
+
The gallery inlines thumbnails as data URIs rather than referencing
|
| 579 |
+
a proxy route, because while the Space is **private** HF's edge
|
| 580 |
+
404s in-browser requests to custom routes (same constraint that
|
| 581 |
+
makes the report viewer use ``srcdoc`` + base64; see
|
| 582 |
+
``space-setup/post-gt-swap.md`` item 12). Inlining means the browser
|
| 583 |
+
makes no second request. Switches to lazy proxy URLs once the Space
|
| 584 |
+
is public.
|
| 585 |
+
"""
|
| 586 |
+
if png_bytes is None:
|
| 587 |
+
return None
|
| 588 |
+
return "data:image/png;base64," + base64.b64encode(png_bytes).decode("ascii")
|
| 589 |
+
|
| 590 |
+
|
| 591 |
+
def _render_data_uri(submission_id: str, fixture: str) -> str | None:
|
| 592 |
+
"""Resolver for a submission's per-fixture gallery thumbnail."""
|
| 593 |
+
return _data_uri(_fetch_render(submission_id, fixture))
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
def _gt_data_uri(fixture: str) -> str | None:
|
| 597 |
+
"""Resolver for a fixture's ground-truth gallery thumbnail."""
|
| 598 |
+
return _data_uri(_fetch_gt_render(fixture))
|
| 599 |
+
|
| 600 |
+
|
| 601 |
+
def _gallery_iframe_html() -> str:
|
| 602 |
+
"""Build the gallery as a self-contained ``srcdoc`` iframe.
|
| 603 |
+
|
| 604 |
+
Reads the live rows, renders the page with base64-inlined images,
|
| 605 |
+
and inlines the whole document into an iframe ``srcdoc`` so it gets
|
| 606 |
+
its own style context (no Gradio CSS collision) and makes no
|
| 607 |
+
second HTTP request (works on the private Space). A Hub read
|
| 608 |
+
failure degrades to an empty gallery rather than crashing the tab.
|
| 609 |
+
"""
|
| 610 |
+
try:
|
| 611 |
+
rows = _load_rows_from_hub()
|
| 612 |
+
except LeaderboardDataError:
|
| 613 |
+
logger.exception("Gallery row load failed; rendering empty gallery")
|
| 614 |
+
rows = []
|
| 615 |
+
doc = render_gallery_page(rows, _render_data_uri, _gt_data_uri)
|
| 616 |
+
escaped = html.escape(doc, quote=True)
|
| 617 |
+
return (
|
| 618 |
+
f'<iframe srcdoc="{escaped}" '
|
| 619 |
+
'style="width:100%; height:90vh; border:0; display:block;" '
|
| 620 |
+
'title="CADGenBench gallery"></iframe>'
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
|
| 624 |
with gr.Blocks(title="CADGenBench Leaderboard", theme=gr.themes.Soft()) as blocks:
|
| 625 |
gr.Markdown(
|
| 626 |
"# CADGenBench Leaderboard\n"
|
| 627 |
"_Benchmarking AI-driven CAD generation._"
|
| 628 |
)
|
| 629 |
|
| 630 |
+
with gr.Tab("Gallery"):
|
| 631 |
+
# Visual-first leaderboard. The bespoke surface (sticky GT row,
|
| 632 |
+
# fixture picker, thumbnail grid, compare modal) is a
|
| 633 |
+
# self-contained HTML doc inlined into an iframe `srcdoc` with
|
| 634 |
+
# base64 thumbnails, so it keeps its own style context and makes
|
| 635 |
+
# no second HTTP request (works on the private Space, where
|
| 636 |
+
# HF's edge 404s in-browser custom-route fetches). Built at boot;
|
| 637 |
+
# the Refresh button rebuilds it after a promotion/new result.
|
| 638 |
+
gallery_html = gr.HTML(value=_gallery_iframe_html())
|
| 639 |
+
gallery_refresh_btn = gr.Button("Refresh gallery", size="sm")
|
| 640 |
+
gallery_refresh_btn.click(
|
| 641 |
+
fn=_gallery_iframe_html, outputs=gallery_html,
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
with gr.Tab("Leaderboard"):
|
| 645 |
# Load both tiers once at boot. `_safe_load_split` keeps a Hub
|
| 646 |
# read failure from crashing the Space: on failure the frames
|
gallery.py
ADDED
|
@@ -0,0 +1,562 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2026 Hugging Face
|
| 2 |
+
#
|
| 3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 4 |
+
# you may not use this file except in compliance with the License.
|
| 5 |
+
# You may obtain a copy of the License at
|
| 6 |
+
#
|
| 7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 8 |
+
#
|
| 9 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 12 |
+
# See the License for the specific language governing permissions and
|
| 13 |
+
# limitations under the License.
|
| 14 |
+
|
| 15 |
+
"""Visual gallery leaderboard page.
|
| 16 |
+
|
| 17 |
+
Builds a self-contained HTML document (its own CSS + JS) from the live
|
| 18 |
+
submission rows. The Space serves it at ``/gallery`` and embeds it in
|
| 19 |
+
the Gradio "Gallery" tab via an iframe, so the bespoke visual surface
|
| 20 |
+
(sticky ground-truth row, fixture picker, thumbnail grid, report
|
| 21 |
+
modal) lives in plain HTML/JS isolated from Gradio's styles rather
|
| 22 |
+
than being forced into Gradio components.
|
| 23 |
+
|
| 24 |
+
The page is data-driven: :func:`build_gallery_payload` shapes the
|
| 25 |
+
top-10 verified rows + the fixture universe into a small JSON blob,
|
| 26 |
+
which the page's JS renders. Render lookups are isolated behind the
|
| 27 |
+
``renderFor`` / ``gtRenderFor`` JS hooks (mirroring the design brief),
|
| 28 |
+
pointed at the Space's render-proxy routes:
|
| 29 |
+
|
| 30 |
+
- ``renderFor(sub, fixtureId)`` -> ``/render/<id>/<fixture>.png``
|
| 31 |
+
(or ``null`` when the per-fixture status is invalid/missing, which
|
| 32 |
+
draws the dashed "invalid generation" cell).
|
| 33 |
+
- ``gtRenderFor(fixtureId)`` -> ``/gt-render/<fixture>.png``.
|
| 34 |
+
|
| 35 |
+
Thumbnail clicks open the existing per-submission report (served by
|
| 36 |
+
the Space's ``/reports/<id>.html`` proxy) deep-linked to the clicked
|
| 37 |
+
fixture's card via ``#fixture=<name>``.
|
| 38 |
+
"""
|
| 39 |
+
from __future__ import annotations
|
| 40 |
+
|
| 41 |
+
import json
|
| 42 |
+
|
| 43 |
+
# Gallery shows the top-N verified submissions only (the visual shop
|
| 44 |
+
# window). The numeric long tail lives on the Full results / Leaderboard
|
| 45 |
+
# tab, not here.
|
| 46 |
+
GALLERY_TOP_N = 10
|
| 47 |
+
|
| 48 |
+
# Default number of fixture columns the picker opens with, capped at the
|
| 49 |
+
# size of the available fixture universe.
|
| 50 |
+
DEFAULT_FIXTURE_COLUMNS = 3
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def _verified_rows(rows: list[dict]) -> list[dict]:
|
| 54 |
+
"""Completed + validated rows, score-sorted desc, capped at the top N.
|
| 55 |
+
|
| 56 |
+
Mirrors the leaderboard's notion of "verified": ``validation_status
|
| 57 |
+
== 'validated'`` and a terminal ``status == 'completed'`` with a
|
| 58 |
+
real aggregate score. Pending / failed / unvalidated rows never
|
| 59 |
+
reach the visual gallery.
|
| 60 |
+
"""
|
| 61 |
+
verified = [
|
| 62 |
+
r
|
| 63 |
+
for r in rows
|
| 64 |
+
if r.get("validation_status") == "validated"
|
| 65 |
+
and r.get("status") == "completed"
|
| 66 |
+
and isinstance(r.get("aggregate_score"), (int, float))
|
| 67 |
+
]
|
| 68 |
+
verified.sort(key=lambda r: r.get("aggregate_score") or 0.0, reverse=True)
|
| 69 |
+
return verified[:GALLERY_TOP_N]
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _fixture_universe(rows: list[dict]) -> list[dict]:
|
| 73 |
+
"""Ordered fixture list discovered from the rows' ``per_fixture_scores``.
|
| 74 |
+
|
| 75 |
+
The fixture set is never hardcoded (it shifts as parts get added /
|
| 76 |
+
removed): it is the union of every ``per_fixture_scores`` key across
|
| 77 |
+
the verified rows, sorted for a stable column order. ``task_type``
|
| 78 |
+
is carried along (first non-null wins) as the small chip tag, since
|
| 79 |
+
difficulty tags are not available in the data.
|
| 80 |
+
"""
|
| 81 |
+
task_by_fixture: dict[str, str] = {}
|
| 82 |
+
for r in rows:
|
| 83 |
+
pfs = r.get("per_fixture_scores") or {}
|
| 84 |
+
for fixture_id, fx in pfs.items():
|
| 85 |
+
if fixture_id not in task_by_fixture:
|
| 86 |
+
task_by_fixture[fixture_id] = (fx or {}).get("task_type") or ""
|
| 87 |
+
return [
|
| 88 |
+
{"id": fid, "name": fid, "task": task_by_fixture[fid]}
|
| 89 |
+
for fid in sorted(task_by_fixture)
|
| 90 |
+
]
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _sub_payload(row: dict, render_resolver) -> dict:
|
| 94 |
+
"""Project one verified row into the compact shape the page JS needs.
|
| 95 |
+
|
| 96 |
+
``render_resolver(submission_id, fixture_id)`` returns the image
|
| 97 |
+
source (a base64 data URI on the private Space, a proxy URL once
|
| 98 |
+
public) for a *valid* fixture, or ``None``. Invalid/missing
|
| 99 |
+
fixtures carry ``img: null`` so the page draws the dashed cell.
|
| 100 |
+
"""
|
| 101 |
+
by_task = row.get("score_by_task_type") or {}
|
| 102 |
+
pfs = row.get("per_fixture_scores") or {}
|
| 103 |
+
sid = row.get("submission_id") or ""
|
| 104 |
+
cells: dict[str, dict] = {}
|
| 105 |
+
for fid, fx in pfs.items():
|
| 106 |
+
fx = fx or {}
|
| 107 |
+
status = fx.get("status") or "missing"
|
| 108 |
+
cells[fid] = {
|
| 109 |
+
"status": status,
|
| 110 |
+
"cad": fx.get("cad_score"),
|
| 111 |
+
"img": render_resolver(sid, fid) if status == "valid" else None,
|
| 112 |
+
}
|
| 113 |
+
return {
|
| 114 |
+
"id": sid,
|
| 115 |
+
"name": row.get("submission_name") or "(unnamed submission)",
|
| 116 |
+
"who": row.get("submitter_name") or "",
|
| 117 |
+
"score": row.get("aggregate_score"),
|
| 118 |
+
"validity": row.get("validity_rate"),
|
| 119 |
+
"gen": by_task.get("generation"),
|
| 120 |
+
"edit": by_task.get("editing"),
|
| 121 |
+
"date": row.get("submitted_at") or "",
|
| 122 |
+
"version": row.get("cadgenbench_version") or "",
|
| 123 |
+
"blobUrl": row.get("submission_blob_url") or "",
|
| 124 |
+
"cells": cells,
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def build_gallery_payload(rows: list[dict], render_resolver, gt_resolver) -> dict:
|
| 129 |
+
"""Shape live rows into the JSON the gallery page renders from.
|
| 130 |
+
|
| 131 |
+
Image sources are injected via two resolvers so this module stays
|
| 132 |
+
agnostic to how renders are served (base64-inlined for the private
|
| 133 |
+
Space, proxy URLs once public):
|
| 134 |
+
|
| 135 |
+
- ``render_resolver(submission_id, fixture_id) -> str | None``
|
| 136 |
+
- ``gt_resolver(fixture_id) -> str | None``
|
| 137 |
+
|
| 138 |
+
Returns ``{"fixtures", "subs", "selected", "gtImg"}`` where
|
| 139 |
+
``selected`` is the default set of (up to three) fixture columns and
|
| 140 |
+
``gtImg`` maps each fixture to its ground-truth image source.
|
| 141 |
+
"""
|
| 142 |
+
verified = _verified_rows(rows)
|
| 143 |
+
fixtures = _fixture_universe(verified)
|
| 144 |
+
selected = [f["id"] for f in fixtures[:DEFAULT_FIXTURE_COLUMNS]]
|
| 145 |
+
gt_img = {f["id"]: gt_resolver(f["id"]) for f in fixtures}
|
| 146 |
+
return {
|
| 147 |
+
"fixtures": fixtures,
|
| 148 |
+
"subs": [_sub_payload(r, render_resolver) for r in verified],
|
| 149 |
+
"selected": selected,
|
| 150 |
+
"gtImg": gt_img,
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def render_gallery_page(rows: list[dict], render_resolver, gt_resolver) -> str:
|
| 155 |
+
"""Build the full standalone gallery HTML document from live rows.
|
| 156 |
+
|
| 157 |
+
``render_resolver`` / ``gt_resolver`` supply image sources (see
|
| 158 |
+
:func:`build_gallery_payload`). The caller (the Space) inlines
|
| 159 |
+
base64 data URIs while private; a local harness can do the same so
|
| 160 |
+
the page is self-contained with no second requests.
|
| 161 |
+
"""
|
| 162 |
+
payload = build_gallery_payload(rows, render_resolver, gt_resolver)
|
| 163 |
+
data_json = json.dumps(payload, ensure_ascii=False)
|
| 164 |
+
return (
|
| 165 |
+
"<!DOCTYPE html><html lang='en'><head>"
|
| 166 |
+
"<meta charset='UTF-8'>"
|
| 167 |
+
"<meta name='viewport' content='width=device-width, initial-scale=1.0'>"
|
| 168 |
+
"<title>CADGenBench Gallery</title>"
|
| 169 |
+
f"<style>{_CSS}</style>"
|
| 170 |
+
"</head><body>"
|
| 171 |
+
f"{_BODY}"
|
| 172 |
+
f"<script>window.GALLERY_DATA = {data_json};</script>"
|
| 173 |
+
f"<script>{_JS}</script>"
|
| 174 |
+
"</body></html>"
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# ---------------------------------------------------------------------------
|
| 179 |
+
# CSS (ported from the reference prototype, trimmed to the gallery surface)
|
| 180 |
+
# ---------------------------------------------------------------------------
|
| 181 |
+
|
| 182 |
+
_CSS = """
|
| 183 |
+
@import url('https://fonts.googleapis.com/css2?family=Archivo:wght@400;500;600;700;800&family=Space+Mono:wght@400;700&display=swap');
|
| 184 |
+
:root {
|
| 185 |
+
--bg: #f4f5f7; --panel: #ffffff; --ink: #14161c; --ink-soft: #5b6170;
|
| 186 |
+
--ink-faint: #9aa0ad; --line: #e3e5ea; --line-strong: #d2d5dd;
|
| 187 |
+
--accent: #4338ca; --accent-soft: #eef0ff; --good: #15803d;
|
| 188 |
+
--good-soft: #e9f7ee; --bad: #b42318; --bad-soft: #fdeceb;
|
| 189 |
+
--gt: #0f766e; --gt-soft: #e6f4f2; --thumb-bg: #eceef2;
|
| 190 |
+
--shadow: 0 1px 2px rgba(20,22,28,.04), 0 8px 24px rgba(20,22,28,.06);
|
| 191 |
+
--radius: 14px;
|
| 192 |
+
}
|
| 193 |
+
* { box-sizing: border-box; }
|
| 194 |
+
body {
|
| 195 |
+
margin: 0; background: var(--bg); color: var(--ink);
|
| 196 |
+
font-family: 'Archivo', sans-serif; -webkit-font-smoothing: antialiased;
|
| 197 |
+
padding: 18px 0 60px;
|
| 198 |
+
}
|
| 199 |
+
.wrap { max-width: 1180px; margin: 0 auto; padding: 0 24px; }
|
| 200 |
+
|
| 201 |
+
.controls {
|
| 202 |
+
background: var(--panel); border: 1px solid var(--line);
|
| 203 |
+
border-radius: var(--radius); padding: 18px 20px; box-shadow: var(--shadow);
|
| 204 |
+
}
|
| 205 |
+
.controls .label {
|
| 206 |
+
font-size: 12px; font-weight: 700; text-transform: uppercase;
|
| 207 |
+
letter-spacing: .06em; color: var(--ink-faint); margin-bottom: 12px;
|
| 208 |
+
}
|
| 209 |
+
.picker-help { font-weight: 500; text-transform: none; letter-spacing: 0; color: var(--ink-faint); font-size: 12px; }
|
| 210 |
+
.chips { display: flex; gap: 10px; flex-wrap: wrap; }
|
| 211 |
+
.chip {
|
| 212 |
+
font-family: inherit; font-size: 13.5px; cursor: pointer;
|
| 213 |
+
border: 1px solid var(--line-strong); background: #fafbfc; color: var(--ink-soft);
|
| 214 |
+
padding: 9px 14px; border-radius: 10px; display: flex; align-items: center;
|
| 215 |
+
gap: 8px; transition: all .14s ease; font-weight: 500;
|
| 216 |
+
}
|
| 217 |
+
.chip:hover { border-color: var(--accent); color: var(--ink); }
|
| 218 |
+
.chip.on { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); font-weight: 600; }
|
| 219 |
+
.chip .tag { font-family: 'Space Mono', monospace; font-size: 10px; padding: 2px 6px; border-radius: 5px; background: rgba(0,0,0,.05); text-transform: uppercase; letter-spacing: .03em; }
|
| 220 |
+
.chip.on .tag { background: rgba(67,56,202,.12); }
|
| 221 |
+
|
| 222 |
+
.section-label {
|
| 223 |
+
display: flex; align-items: center; gap: 10px; margin: 28px 0 14px;
|
| 224 |
+
font-size: 14px; font-weight: 700; color: var(--accent);
|
| 225 |
+
text-transform: uppercase; letter-spacing: .05em;
|
| 226 |
+
}
|
| 227 |
+
.section-label .verified-pill {
|
| 228 |
+
font-family: 'Space Mono', monospace; font-size: 10px; color: var(--good);
|
| 229 |
+
background: var(--good-soft); padding: 3px 8px; border-radius: 999px;
|
| 230 |
+
letter-spacing: .02em; display: inline-flex; align-items: center; gap: 5px;
|
| 231 |
+
}
|
| 232 |
+
.dot { width: 6px; height: 6px; border-radius: 50%; background: currentColor; }
|
| 233 |
+
|
| 234 |
+
.gallery { background: var(--panel); border: 1px solid var(--line); border-radius: var(--radius); box-shadow: var(--shadow); position: relative; }
|
| 235 |
+
.grid-head, .grow {
|
| 236 |
+
display: grid;
|
| 237 |
+
grid-template-columns: 52px minmax(220px, 1.4fr) 116px repeat(var(--ncol, 3), minmax(150px, 1fr));
|
| 238 |
+
align-items: stretch;
|
| 239 |
+
}
|
| 240 |
+
.grid-head {
|
| 241 |
+
background: #fbfbfd; border-bottom: 1px solid var(--line); font-size: 11px;
|
| 242 |
+
text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint);
|
| 243 |
+
font-weight: 700; position: sticky; top: 0; z-index: 20;
|
| 244 |
+
border-radius: var(--radius) var(--radius) 0 0;
|
| 245 |
+
}
|
| 246 |
+
.grid-head > div { padding: 13px 14px; display: flex; align-items: center; }
|
| 247 |
+
.grid-head .fix-h { flex-direction: column; align-items: flex-start; gap: 2px; }
|
| 248 |
+
.grid-head .fix-h .fname { color: var(--ink-soft); text-transform: none; letter-spacing: 0; font-family: 'Space Mono', monospace; font-size: 11px; font-weight: 700; }
|
| 249 |
+
.grid-head .fix-h .ftask { font-size: 9.5px; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; }
|
| 250 |
+
|
| 251 |
+
.grow.gt-row {
|
| 252 |
+
background: var(--gt-soft); border-bottom: 2px solid var(--gt);
|
| 253 |
+
position: sticky; top: var(--head-h, 46px); z-index: 15;
|
| 254 |
+
box-shadow: 0 6px 14px -8px rgba(15,118,110,.45);
|
| 255 |
+
}
|
| 256 |
+
.grow.gt-row .rank, .grow.gt-row .ident { display: flex; align-items: center; }
|
| 257 |
+
.grow.gt-row .ident { font-weight: 700; color: var(--gt); flex-direction: column; align-items: flex-start; justify-content: center; gap: 2px; }
|
| 258 |
+
.grow.gt-row .ident .gt-sub { font-weight: 500; font-size: 11.5px; color: var(--gt); opacity: .8; }
|
| 259 |
+
.grow.gt-row .score-cell { color: var(--gt); }
|
| 260 |
+
|
| 261 |
+
.grow.sub-row { border-bottom: 1px solid var(--line); transition: background .12s ease; }
|
| 262 |
+
.grow.sub-row:last-child { border-bottom: none; }
|
| 263 |
+
.grow.sub-row:hover { background: #fafbff; }
|
| 264 |
+
|
| 265 |
+
.rank {
|
| 266 |
+
padding: 16px 14px; font-family: 'Space Mono', monospace; font-weight: 700;
|
| 267 |
+
font-size: 15px; color: var(--ink-faint); display: flex; align-items: center;
|
| 268 |
+
justify-content: center;
|
| 269 |
+
}
|
| 270 |
+
.rank.medal-1 { color: #b8860b; } .rank.medal-2 { color: #6b7280; } .rank.medal-3 { color: #a0522d; }
|
| 271 |
+
|
| 272 |
+
.ident { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 3px; min-width: 0; }
|
| 273 |
+
.ident .sub-name { font-weight: 600; font-size: 14.5px; line-height: 1.25; }
|
| 274 |
+
.ident .submitter { font-size: 12px; color: var(--ink-faint); font-family: 'Space Mono', monospace; }
|
| 275 |
+
|
| 276 |
+
.score-cell { padding: 14px; display: flex; flex-direction: column; justify-content: center; gap: 4px; }
|
| 277 |
+
.score-cell .agg { font-size: 22px; font-weight: 800; letter-spacing: -.01em; }
|
| 278 |
+
.score-cell .validity { font-size: 11.5px; font-family: 'Space Mono', monospace; color: var(--good); font-weight: 700; display: flex; align-items: baseline; gap: 5px; }
|
| 279 |
+
.score-cell .validity .vlabel { font-weight: 400; color: var(--ink-faint); text-transform: uppercase; letter-spacing: .04em; font-size: 10px; }
|
| 280 |
+
.score-cell .validity.imperfect { color: #b45309; }
|
| 281 |
+
.score-cell .validity.imperfect .vlabel { color: #c98a3a; }
|
| 282 |
+
|
| 283 |
+
.thumb-cell { padding: 8px; display: flex; align-items: center; justify-content: center; position: relative; }
|
| 284 |
+
.thumb {
|
| 285 |
+
width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg);
|
| 286 |
+
border: 1px solid var(--line); overflow: hidden; cursor: pointer; position: relative;
|
| 287 |
+
transition: transform .14s ease, box-shadow .14s ease, border-color .14s ease;
|
| 288 |
+
}
|
| 289 |
+
.thumb:hover { transform: translateY(-2px); box-shadow: 0 6px 18px rgba(20,22,28,.14); border-color: var(--accent); }
|
| 290 |
+
.thumb img { width: 100%; height: 100%; display: block; object-fit: contain; }
|
| 291 |
+
.thumb .open-hint {
|
| 292 |
+
position: absolute; inset: 0; display: flex; align-items: center; justify-content: center;
|
| 293 |
+
opacity: 0; background: rgba(67,56,202,.08); transition: opacity .14s ease;
|
| 294 |
+
font-size: 11px; font-weight: 700; color: var(--accent); text-transform: uppercase; letter-spacing: .04em;
|
| 295 |
+
}
|
| 296 |
+
.thumb:hover .open-hint { opacity: 1; }
|
| 297 |
+
|
| 298 |
+
.thumb.failed { cursor: default; background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
|
| 299 |
+
.thumb.failed:hover { transform: none; box-shadow: none; border-color: #e9b3ae; }
|
| 300 |
+
.thumb.failed .ftag { font-family: 'Space Mono', monospace; font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; line-height: 1.4; }
|
| 301 |
+
|
| 302 |
+
.sub-row.open { background: #fafbff; }
|
| 303 |
+
.detail {
|
| 304 |
+
grid-column: 1 / -1; background: #f8f9fc; border-top: 1px dashed var(--line-strong);
|
| 305 |
+
padding: 0 14px; max-height: 0; overflow: hidden; transition: max-height .28s ease, padding .28s ease;
|
| 306 |
+
}
|
| 307 |
+
.detail.show { max-height: 260px; padding: 18px 14px 22px; }
|
| 308 |
+
.metric-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); gap: 12px; }
|
| 309 |
+
.metric { background: var(--panel); border: 1px solid var(--line); border-radius: 10px; padding: 12px 14px; }
|
| 310 |
+
.metric .m-label { font-size: 10px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; }
|
| 311 |
+
.metric .m-val { font-size: 20px; font-weight: 800; margin-top: 4px; letter-spacing: -.01em; }
|
| 312 |
+
.detail-foot { margin-top: 14px; font-size: 12.5px; color: var(--ink-soft); display: flex; gap: 18px; flex-wrap: wrap; align-items: center; }
|
| 313 |
+
.detail-foot a { color: var(--accent); text-decoration: none; font-weight: 600; }
|
| 314 |
+
.detail-foot a:hover { text-decoration: underline; }
|
| 315 |
+
.row-toggle {
|
| 316 |
+
grid-column: 1 / -1; display: flex; align-items: center; justify-content: center; gap: 6px;
|
| 317 |
+
padding: 7px; cursor: pointer; font-size: 11px; font-weight: 700; color: var(--ink-faint);
|
| 318 |
+
text-transform: uppercase; letter-spacing: .05em; border-top: 1px solid var(--line);
|
| 319 |
+
background: #fcfcfe; user-select: none;
|
| 320 |
+
}
|
| 321 |
+
.row-toggle:hover { color: var(--accent); background: var(--accent-soft); }
|
| 322 |
+
.row-toggle .chev { transition: transform .2s ease; }
|
| 323 |
+
.sub-row.open .row-toggle .chev { transform: rotate(180deg); }
|
| 324 |
+
|
| 325 |
+
.empty-note { background: var(--panel); border: 1px dashed var(--line-strong); border-radius: var(--radius); padding: 48px 24px; text-align: center; color: var(--ink-faint); font-size: 14px; }
|
| 326 |
+
|
| 327 |
+
/* compare modal (GT vs output) */
|
| 328 |
+
.modal-back { position: fixed; inset: 0; background: rgba(20,22,28,.5); backdrop-filter: blur(3px); display: none; align-items: center; justify-content: center; z-index: 50; padding: 24px; }
|
| 329 |
+
.modal-back.show { display: flex; }
|
| 330 |
+
.modal { background: var(--panel); border-radius: 16px; width: 100%; max-width: 620px; padding: 26px; box-shadow: 0 24px 60px rgba(0,0,0,.3); }
|
| 331 |
+
.modal h4 { margin: 0 0 4px; font-size: 18px; }
|
| 332 |
+
.modal .msub { color: var(--ink-faint); font-size: 13px; font-family: 'Space Mono', monospace; margin-bottom: 18px; }
|
| 333 |
+
.modal-compare { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; }
|
| 334 |
+
.modal-compare figure { margin: 0; }
|
| 335 |
+
.modal-compare figcaption { font-size: 11px; text-transform: uppercase; letter-spacing: .05em; color: var(--ink-faint); font-weight: 700; margin-bottom: 6px; }
|
| 336 |
+
.modal-compare .mthumb { width: 100%; aspect-ratio: 16/10; border-radius: 8px; background: var(--thumb-bg); border: 1px solid var(--line); overflow: hidden; }
|
| 337 |
+
.modal-compare .mthumb img { width: 100%; height: 100%; object-fit: contain; display: block; }
|
| 338 |
+
.modal-compare .mthumb.failed { background: var(--bad-soft); border: 1px dashed #e9b3ae; display: flex; align-items: center; justify-content: center; }
|
| 339 |
+
.modal-compare .mthumb.failed span { font-family: 'Space Mono', monospace; font-size: 10px; font-weight: 700; color: var(--bad); text-transform: uppercase; letter-spacing: .04em; text-align: center; }
|
| 340 |
+
.modal-note { margin-top: 18px; font-size: 12.5px; color: var(--ink-soft); background: var(--accent-soft); padding: 12px 14px; border-radius: 10px; }
|
| 341 |
+
.modal-close { margin-top: 20px; width: 100%; padding: 11px; border: 1px solid var(--line-strong); background: #fafbfc; border-radius: 10px; font-family: inherit; font-weight: 600; cursor: pointer; font-size: 14px; }
|
| 342 |
+
.modal-close:hover { background: var(--accent-soft); border-color: var(--accent); color: var(--accent); }
|
| 343 |
+
"""
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
# ---------------------------------------------------------------------------
|
| 347 |
+
# Body
|
| 348 |
+
# ---------------------------------------------------------------------------
|
| 349 |
+
|
| 350 |
+
_BODY = """
|
| 351 |
+
<div class="wrap">
|
| 352 |
+
<div class="controls">
|
| 353 |
+
<div class="label">Fixtures shown <span class="picker-help">- pick 3 to compare across all models (changes columns globally)</span></div>
|
| 354 |
+
<div class="chips" id="chips"></div>
|
| 355 |
+
</div>
|
| 356 |
+
<div class="section-label">
|
| 357 |
+
Validated leaderboard - Top 10
|
| 358 |
+
<span class="verified-pill"><span class="dot"></span>verified only</span>
|
| 359 |
+
</div>
|
| 360 |
+
<div class="gallery" id="gallery">
|
| 361 |
+
<div class="grid-head" id="gridHead"></div>
|
| 362 |
+
</div>
|
| 363 |
+
</div>
|
| 364 |
+
<div class="modal-back" id="modalBack">
|
| 365 |
+
<div class="modal">
|
| 366 |
+
<h4 id="modalTitle"></h4>
|
| 367 |
+
<div class="msub" id="modalSub"></div>
|
| 368 |
+
<div class="modal-compare">
|
| 369 |
+
<figure><figcaption>Ground truth</figcaption><div class="mthumb" id="modalGt"></div></figure>
|
| 370 |
+
<figure><figcaption id="modalOutCap">Output (aligned)</figcaption><div class="mthumb" id="modalOut"></div></figure>
|
| 371 |
+
</div>
|
| 372 |
+
<div class="modal-note" id="modalNote"></div>
|
| 373 |
+
<button class="modal-close" id="modalClose">Close</button>
|
| 374 |
+
</div>
|
| 375 |
+
</div>
|
| 376 |
+
"""
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
# ---------------------------------------------------------------------------
|
| 380 |
+
# JS (data-driven render of the gallery; render lookups isolated behind
|
| 381 |
+
# renderFor / gtRenderFor as in the design brief)
|
| 382 |
+
# ---------------------------------------------------------------------------
|
| 383 |
+
|
| 384 |
+
_JS = """
|
| 385 |
+
const DATA = window.GALLERY_DATA || {fixtures: [], subs: [], selected: []};
|
| 386 |
+
const FIXTURES = DATA.fixtures;
|
| 387 |
+
let selected = (DATA.selected || []).slice();
|
| 388 |
+
|
| 389 |
+
// --- Render hooks. ---------------------------------------------------------
|
| 390 |
+
// The image sources are injected by the server (base64 data URIs while the
|
| 391 |
+
// Space is private; proxy URLs once public), so these just read the payload.
|
| 392 |
+
// renderFor returns null for an invalid/missing fixture -> dashed cell.
|
| 393 |
+
function renderFor(sub, fxId) {
|
| 394 |
+
const c = sub.cells[fxId];
|
| 395 |
+
return c ? c.img : null;
|
| 396 |
+
}
|
| 397 |
+
function gtRenderFor(fxId) {
|
| 398 |
+
return (DATA.gtImg || {})[fxId] || null;
|
| 399 |
+
}
|
| 400 |
+
function cellOf(sub, fxId) { return sub.cells[fxId] || {}; }
|
| 401 |
+
|
| 402 |
+
function fmt(x, d) { return (x === null || x === undefined) ? '-' : Number(x).toFixed(d); }
|
| 403 |
+
function pct(x) { return (x === null || x === undefined) ? '-' : Math.round(Number(x) * 100) + '%'; }
|
| 404 |
+
function esc(s) { return String(s == null ? '' : s).replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); }
|
| 405 |
+
function fixtureMeta(id) { return FIXTURES.find(f => f.id === id); }
|
| 406 |
+
|
| 407 |
+
function buildChips() {
|
| 408 |
+
const wrap = document.getElementById('chips');
|
| 409 |
+
wrap.innerHTML = FIXTURES.map(f => {
|
| 410 |
+
const on = selected.includes(f.id);
|
| 411 |
+
const tag = f.task ? '<span class="tag">' + esc(f.task) + '</span>' : '';
|
| 412 |
+
return '<button class="chip ' + (on ? 'on' : '') + '" data-id="' + esc(f.id) + '">' + tag + esc(f.name) + '</button>';
|
| 413 |
+
}).join('');
|
| 414 |
+
wrap.querySelectorAll('.chip').forEach(c => {
|
| 415 |
+
c.onclick = () => {
|
| 416 |
+
const id = c.dataset.id;
|
| 417 |
+
if (selected.includes(id)) {
|
| 418 |
+
if (selected.length <= 1) return; // keep at least 1
|
| 419 |
+
selected = selected.filter(x => x !== id);
|
| 420 |
+
} else {
|
| 421 |
+
if (selected.length >= 3) selected.shift(); // cap at 3, drop oldest
|
| 422 |
+
selected.push(id);
|
| 423 |
+
}
|
| 424 |
+
buildChips(); buildGallery();
|
| 425 |
+
};
|
| 426 |
+
});
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
function buildHead() {
|
| 430 |
+
const head = document.getElementById('gridHead');
|
| 431 |
+
let h = '<div>#</div><div>Submission</div><div>Score</div>';
|
| 432 |
+
selected.forEach(id => {
|
| 433 |
+
const f = fixtureMeta(id);
|
| 434 |
+
const task = f && f.task ? '<span class="ftask">' + esc(f.task) + '</span>' : '';
|
| 435 |
+
h += '<div class="fix-h"><span class="fname">' + esc(f ? f.name : id) + '</span>' + task + '</div>';
|
| 436 |
+
});
|
| 437 |
+
head.innerHTML = h;
|
| 438 |
+
}
|
| 439 |
+
|
| 440 |
+
function thumbHTML(url, attrs, clickable) {
|
| 441 |
+
if (!url) {
|
| 442 |
+
return '<div class="thumb failed"><span class="ftag">invalid<br>generation</span></div>';
|
| 443 |
+
}
|
| 444 |
+
const hint = clickable ? '<span class="open-hint">open</span>' : '';
|
| 445 |
+
return '<div class="thumb" ' + attrs + '><img loading="lazy" src="' + url + '" alt="">' + hint + '</div>';
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
function buildGallery() {
|
| 449 |
+
const g = document.getElementById('gallery');
|
| 450 |
+
g.style.setProperty('--ncol', selected.length);
|
| 451 |
+
buildHead();
|
| 452 |
+
g.querySelectorAll('.grow').forEach(n => n.remove());
|
| 453 |
+
|
| 454 |
+
if (!DATA.subs.length) {
|
| 455 |
+
let note = g.querySelector('.empty-note');
|
| 456 |
+
if (!note) {
|
| 457 |
+
note = document.createElement('div');
|
| 458 |
+
note.className = 'empty-note';
|
| 459 |
+
note.textContent = 'No verified submissions yet. Once a submission is promoted to the validated tier it appears here.';
|
| 460 |
+
g.appendChild(note);
|
| 461 |
+
}
|
| 462 |
+
return;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
// Ground-truth pinned row.
|
| 466 |
+
const gt = document.createElement('div');
|
| 467 |
+
gt.className = 'grow gt-row';
|
| 468 |
+
let gtCells = '<div class="rank">★</div>'
|
| 469 |
+
+ '<div class="ident">Ground truth<span class="gt-sub">reference geometry</span></div>'
|
| 470 |
+
+ '<div class="score-cell"><span class="agg">1.000</span></div>';
|
| 471 |
+
selected.forEach(id => {
|
| 472 |
+
gtCells += '<div class="thumb-cell">' + thumbHTML(gtRenderFor(id), 'data-gt="' + esc(id) + '"', false) + '</div>';
|
| 473 |
+
});
|
| 474 |
+
gt.innerHTML = gtCells;
|
| 475 |
+
g.appendChild(gt);
|
| 476 |
+
|
| 477 |
+
DATA.subs.forEach((s, i) => {
|
| 478 |
+
const row = document.createElement('div');
|
| 479 |
+
row.className = 'grow sub-row';
|
| 480 |
+
const medal = i < 3 ? 'medal-' + (i + 1) : '';
|
| 481 |
+
const imperfect = (s.validity !== null && s.validity < 1) ? 'imperfect' : '';
|
| 482 |
+
let cells = '<div class="rank ' + medal + '">' + (i + 1) + '</div>'
|
| 483 |
+
+ '<div class="ident"><span class="sub-name">' + esc(s.name) + '</span><span class="submitter">' + esc(s.who) + '</span></div>'
|
| 484 |
+
+ '<div class="score-cell"><span class="agg">' + fmt(s.score, 3) + '</span>'
|
| 485 |
+
+ '<span class="validity ' + imperfect + '">' + pct(s.validity) + ' <span class="vlabel">valid</span></span></div>';
|
| 486 |
+
selected.forEach(id => {
|
| 487 |
+
cells += '<div class="thumb-cell">' + thumbHTML(renderFor(s, id), 'data-sub="' + esc(s.id) + '" data-fix="' + esc(id) + '"', true) + '</div>';
|
| 488 |
+
});
|
| 489 |
+
cells += '<div class="row-toggle" data-toggle="' + esc(s.id) + '">more numbers <span class="chev">▾</span></div>';
|
| 490 |
+
cells += '<div class="detail" id="detail-' + esc(s.id) + '">'
|
| 491 |
+
+ '<div class="metric-grid">'
|
| 492 |
+
+ '<div class="metric"><div class="m-label">Generation</div><div class="m-val">' + fmt(s.gen, 3) + '</div></div>'
|
| 493 |
+
+ '<div class="metric"><div class="m-label">Editing</div><div class="m-val">' + fmt(s.edit, 3) + '</div></div>'
|
| 494 |
+
+ '<div class="metric"><div class="m-label">Validity</div><div class="m-val">' + pct(s.validity) + '</div></div>'
|
| 495 |
+
+ '</div>'
|
| 496 |
+
+ '<div class="detail-foot"><span>Submitted ' + esc(s.date) + (s.version ? ' - cadgenbench v' + esc(s.version) : '') + '</span>'
|
| 497 |
+
+ (s.blobUrl ? '<a href="' + esc(s.blobUrl) + '" target="_blank" rel="noopener">Download ZIP</a>' : '')
|
| 498 |
+
+ '</div></div>';
|
| 499 |
+
row.innerHTML = cells;
|
| 500 |
+
g.appendChild(row);
|
| 501 |
+
});
|
| 502 |
+
|
| 503 |
+
wireGallery();
|
| 504 |
+
syncHeadHeight();
|
| 505 |
+
}
|
| 506 |
+
|
| 507 |
+
function wireGallery() {
|
| 508 |
+
document.querySelectorAll('[data-toggle]').forEach(t => {
|
| 509 |
+
t.onclick = () => {
|
| 510 |
+
const id = t.dataset.toggle;
|
| 511 |
+
document.getElementById('detail-' + id).classList.toggle('show');
|
| 512 |
+
t.closest('.sub-row').classList.toggle('open');
|
| 513 |
+
};
|
| 514 |
+
});
|
| 515 |
+
document.querySelectorAll('.thumb[data-sub]').forEach(th => {
|
| 516 |
+
th.onclick = () => {
|
| 517 |
+
const sub = DATA.subs.find(x => x.id === th.dataset.sub);
|
| 518 |
+
openModal(th.dataset.fix, sub);
|
| 519 |
+
};
|
| 520 |
+
});
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
function openModal(fxId, sub) {
|
| 524 |
+
document.getElementById('modalTitle').textContent = fxId;
|
| 525 |
+
document.getElementById('modalSub').textContent = sub.name;
|
| 526 |
+
const gt = gtRenderFor(fxId);
|
| 527 |
+
const out = renderFor(sub, fxId);
|
| 528 |
+
const cell = cellOf(sub, fxId);
|
| 529 |
+
document.getElementById('modalGt').innerHTML = gt
|
| 530 |
+
? '<img src="' + gt + '" alt="ground truth">' : '<span>no GT render</span>';
|
| 531 |
+
const outEl = document.getElementById('modalOut');
|
| 532 |
+
if (out) {
|
| 533 |
+
outEl.className = 'mthumb';
|
| 534 |
+
outEl.innerHTML = '<img src="' + out + '" alt="output">';
|
| 535 |
+
} else {
|
| 536 |
+
outEl.className = 'mthumb failed';
|
| 537 |
+
outEl.innerHTML = '<span>invalid<br>generation</span>';
|
| 538 |
+
}
|
| 539 |
+
const cad = (cell.cad === null || cell.cad === undefined) ? '-' : Number(cell.cad).toFixed(3);
|
| 540 |
+
document.getElementById('modalNote').innerHTML =
|
| 541 |
+
'CAD score for this fixture: <b>' + cad + '</b>. The full per-fixture report '
|
| 542 |
+
+ '(shape similarity, interface, topology + 3D view) opens from the report viewer.';
|
| 543 |
+
document.getElementById('modalBack').classList.add('show');
|
| 544 |
+
}
|
| 545 |
+
function closeModal() {
|
| 546 |
+
document.getElementById('modalBack').classList.remove('show');
|
| 547 |
+
}
|
| 548 |
+
document.getElementById('modalClose').onclick = closeModal;
|
| 549 |
+
document.getElementById('modalBack').onclick = (e) => { if (e.target.id === 'modalBack') closeModal(); };
|
| 550 |
+
document.addEventListener('keydown', (e) => { if (e.key === 'Escape') closeModal(); });
|
| 551 |
+
|
| 552 |
+
// Pin the GT row exactly beneath the sticky column header.
|
| 553 |
+
function syncHeadHeight() {
|
| 554 |
+
const head = document.getElementById('gridHead');
|
| 555 |
+
if (head) document.documentElement.style.setProperty('--head-h', head.offsetHeight + 'px');
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
buildChips();
|
| 559 |
+
buildGallery();
|
| 560 |
+
window.addEventListener('resize', syncHeadHeight);
|
| 561 |
+
if (document.fonts && document.fonts.ready) document.fonts.ready.then(syncHeadHeight);
|
| 562 |
+
"""
|
leaderboard.py
CHANGED
|
@@ -44,6 +44,10 @@ HF_SUBMISSIONS_REPO = os.getenv(
|
|
| 44 |
"HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
|
| 45 |
)
|
| 46 |
HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
RESULTS_FILENAME = "results.jsonl"
|
| 49 |
HUB_FETCH_TIMEOUT_SECONDS = 30
|
|
|
|
| 44 |
"HF_SUBMISSIONS_REPO", f"{HF_ORG}/cadgenbench-submissions"
|
| 45 |
)
|
| 46 |
HF_DATA_REPO = os.getenv("HF_DATA_REPO", f"{HF_ORG}/cadgenbench-data")
|
| 47 |
+
# Private ground-truth dataset. The gallery's GT render proxy reads
|
| 48 |
+
# `<fixture>/renders/iso.png` from here (needs the Space HF_TOKEN's
|
| 49 |
+
# read scope, same token the eval already uses for GT).
|
| 50 |
+
HF_DATA_GT_REPO = os.getenv("HF_DATA_GT_REPO", f"{HF_ORG}/cadgenbench-data-gt")
|
| 51 |
|
| 52 |
RESULTS_FILENAME = "results.jsonl"
|
| 53 |
HUB_FETCH_TIMEOUT_SECONDS = 30
|