| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| """CADGenBench Leaderboard Space - Gradio UI + report-proxy mount. |
| |
| Read path lives in :mod:`leaderboard`. Submit-tab validation lives in |
| :mod:`submit`. Both are wired into the Gradio Blocks below. The |
| Gradio app is mounted under a FastAPI parent so the custom |
| ``/reports/{submission_id}.html`` route can re-serve dataset HTML |
| with ``Content-Type: text/html`` (HF Hub's ``/resolve/`` serves it |
| as ``text/plain`` by policy, which makes the browser show source |
| rather than render). |
| """ |
| from __future__ import annotations |
|
|
| import base64 |
| import html |
| import logging |
| import mimetypes |
| import os |
| from functools import lru_cache |
| from pathlib import Path |
|
|
| import gradio as gr |
| import pandas as pd |
| import uvicorn |
| from fastapi import FastAPI |
| from fastapi.responses import HTMLResponse, Response |
| from gradio_leaderboard import Leaderboard |
| from huggingface_hub import hf_hub_download, snapshot_download |
|
|
| from leaderboard import ( |
| ADMIN_COLUMNS, |
| ADMIN_SELECT_COL, |
| HF_DATA_GT_REPO, |
| HF_DATA_REPO, |
| HF_SUBMISSIONS_REPO, |
| LEADERBOARD_COLS, |
| LEADERBOARD_DATATYPES, |
| LEADERBOARD_HIDE_COLUMNS, |
| VALIDATED_LEADERBOARD_COLS, |
| VALIDATED_LEADERBOARD_DATATYPES, |
| LeaderboardDataError, |
| _fmt_timestamp, |
| _load_rows_from_hub, |
| build_combined_csv, |
| load_admin_table, |
| load_leaderboard_split, |
| render_public_url, |
| ) |
| from gallery import render_gallery_page |
| from metrics_page import build_metrics_page |
| from tasks import load_tasks_from_dir, render_tasks_page |
| from admin import ( |
| VALID_METHODS, |
| delete_rows, |
| demote_rows, |
| is_admin, |
| promote_rows, |
| rescore_all, |
| rescore_rows, |
| stop_and_delete_rows, |
| ) |
| from submit import handle_submit |
|
|
| logger = logging.getLogger(__name__) |
|
|
| |
| |
| |
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s %(levelname)s [%(name)s] %(message)s", |
| ) |
|
|
|
|
| |
| |
| |
| |
| VALIDATION_DOC_URL = ( |
| "https://github.com/huggingface/cadgenbench/blob/main/docs/benchmark/validation.md" |
| ) |
| |
| |
| |
| |
| SUBMISSION_DOC_URL = ( |
| "https://github.com/huggingface/cadgenbench/blob/main/docs/benchmark/submission.md" |
| ) |
|
|
| ABOUT_MD = f"""## About |
| |
| **CADGenBench** evaluates AI-driven CAD generation: how well a model can |
| turn a description of a mechanical part into a valid, geometrically |
| correct 3D model. |
| |
| - **Reference baseline**: an iterative AI agent that writes build123d Python. |
| - **Submission flow**: upload a zip of per-fixture STEP files; the Space |
| runs the eval and appends a row to the submissions dataset. |
| - **Datasets**: fixture inputs in |
| [`{HF_DATA_REPO}`](https://huggingface.co/datasets/{HF_DATA_REPO}); |
| submissions and computed results in |
| [`{HF_SUBMISSIONS_REPO}`](https://huggingface.co/datasets/{HF_SUBMISSIONS_REPO}). |
| - **Code**: [`huggingface/cadgenbench`](https://github.com/huggingface/cadgenbench). |
| - **Validation policy**: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL}). |
| - **Data**: CAD geometry from [Mecado](https://www.mecado.com). |
| """ |
|
|
| |
| |
| |
| |
| |
| CITATION_BIBTEX = r"""@misc{cadgenbench2026, |
| author = {Rabinovich, Michael and {Hugging Face}}, |
| title = {{CADGenBench}: a benchmark for {AI}-driven {CAD} generation}, |
| year = {2026}, |
| publisher = {Hugging Face}, |
| howpublished = {\url{https://huggingface.co/spaces/HuggingAI4Engineering/CADGenBench}}, |
| }""" |
|
|
| VALIDATION_GUIDELINES_MD = f"""Submissions appear on the **Unvalidated** table the moment evaluation completes. Maintainers promote rows to **Validated** after methodology review, accepting one of four evidence types (`code`, `traces`, `api`, `manual`). |
| |
| Full policy: [`docs/benchmark/validation.md`]({VALIDATION_DOC_URL}).""" |
|
|
| SUBMIT_STATUS_IDLE = ( |
| "_Log in, attach a zip, and click **Submit**. Progress and any " |
| "errors appear here._" |
| ) |
|
|
|
|
| def _data_error_banner_md(message: str | None) -> str: |
| """Markdown for the top-of-tab data-unavailable banner. |
| |
| Empty string when there's no error (the banner is also hidden via |
| ``visible=False`` in that case). When the live ``results.jsonl`` |
| can't be read, the banner is the loud, persistent signal that the |
| tables below are empty *by design* (we never fall back to stale or |
| bundled data) rather than because the leaderboard is genuinely |
| empty. |
| """ |
| if not message: |
| return "" |
| return ( |
| "> ⚠️ **Leaderboard data unavailable.** The live results could not " |
| "be read from the Hub, so the tables below are empty. No stale or " |
| "cached data is ever shown in its place.\n>\n" |
| f"> Details: `{message}`" |
| ) |
|
|
|
|
| def _safe_load_split() -> tuple[pd.DataFrame, pd.DataFrame, str | None]: |
| """Load both tiers, turning a Hub failure into empty frames + a message. |
| |
| The reader (:func:`load_leaderboard_split`) deliberately *raises* |
| on any read failure (no silent fallback). The Space, however, must |
| stay up and loudly surface the failure rather than crash, so this |
| wrapper converts :class:`LeaderboardDataError` into empty, |
| correctly-shaped DataFrames plus an error string the caller renders |
| in the banner / a toast. Returns ``(validated, unvalidated, error)`` |
| with ``error`` ``None`` on success. |
| """ |
| try: |
| validated, unvalidated = load_leaderboard_split() |
| return validated, unvalidated, None |
| except LeaderboardDataError as e: |
| logger.exception("Leaderboard data load failed") |
| return ( |
| pd.DataFrame(columns=VALIDATED_LEADERBOARD_COLS), |
| pd.DataFrame(columns=LEADERBOARD_COLS), |
| str(e), |
| ) |
|
|
|
|
| def _safe_load_admin() -> tuple[pd.DataFrame, str | None]: |
| """Admin-table counterpart to :func:`_safe_load_split`. |
| |
| Same no-crash contract: a Hub read failure yields an empty, |
| correctly-shaped admin frame plus the error string instead of |
| propagating the exception (which would take the whole Space down at |
| boot, since the admin table loads at module-construction time). |
| """ |
| try: |
| return load_admin_table(), None |
| except LeaderboardDataError as e: |
| logger.exception("Admin table load failed") |
| return pd.DataFrame(columns=ADMIN_COLUMNS), str(e) |
|
|
|
|
| def _refresh_leaderboard_with_toast(): |
| """Manual Refresh button handler: toast + fresh DataFrames + banner. |
| |
| Surfaces the outcome loudly either way: ``gr.Info`` on success, |
| ``gr.Warning`` when the live read failed. The third output keeps |
| the data-unavailable banner in sync (shown with the error, |
| cleared on success). |
| """ |
| validated, unvalidated, error = _safe_load_split() |
| if error: |
| gr.Warning(f"Leaderboard data unavailable: {error}") |
| else: |
| gr.Info("Leaderboard refreshed.") |
| return ( |
| validated, |
| unvalidated, |
| gr.Markdown(value=_data_error_banner_md(error), visible=error is not None), |
| ) |
|
|
|
|
| def _auto_refresh_leaderboard(): |
| """Timer-tick handler: fresh DataFrames + banner, no success toast. |
| |
| Mirrors :func:`_refresh_leaderboard_with_toast` but stays silent on |
| success (a toast every 10s would be noise). A read failure still |
| fires a loud ``gr.Warning`` and updates the banner so a degraded |
| Hub read can't quietly leave the tables blank. |
| """ |
| validated, unvalidated, error = _safe_load_split() |
| if error: |
| gr.Warning(f"Leaderboard data unavailable: {error}") |
| return ( |
| validated, |
| unvalidated, |
| gr.Markdown(value=_data_error_banner_md(error), visible=error is not None), |
| ) |
|
|
|
|
| def _enable_submit_when_logged_in( |
| profile: gr.OAuthProfile | None, |
| ) -> gr.Button: |
| """Flip the Submit button's interactivity based on login state. |
| |
| Runs once per page load via ``blocks.load``. Gradio injects |
| ``gr.OAuthProfile`` automatically (``None`` if the visitor isn't |
| logged in via the LoginButton). The visible-disable mirrors the |
| server-side gate in :func:`submit.handle_submit`; the handler |
| still raises ``gr.Error`` defensively if it ever gets called |
| without a profile. |
| """ |
| return gr.Button(interactive=profile is not None) |
|
|
|
|
| def _selected_ids(table_df: pd.DataFrame | None) -> list[str]: |
| """Submission ids of the rows whose ``select`` checkbox is ticked.""" |
| if ( |
| table_df is None |
| or len(table_df) == 0 |
| or ADMIN_SELECT_COL not in table_df.columns |
| or "submission_id" not in table_df.columns |
| ): |
| return [] |
| mask = table_df[ADMIN_SELECT_COL].apply(bool) |
| return [str(s) for s in table_df.loc[mask, "submission_id"].tolist() if s] |
|
|
|
|
| def _admin_selection_status(table_df: pd.DataFrame | None) -> str: |
| """Live count line under the admin table, updated as boxes are ticked.""" |
| n = len(_selected_ids(table_df)) |
| return f"**{n}** row(s) selected." if n else "_No rows selected._" |
|
|
|
|
| def _gate_admin_controls( |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[ |
| gr.Column, gr.Dataframe, gr.Radio, gr.Button, gr.Button, gr.Checkbox, |
| gr.Button, gr.Button, gr.Checkbox, gr.Button, gr.Textbox, gr.Button, str, |
| ]: |
| """Reveal the admin panel only for a logged-in user in the admin set. |
| |
| Runs on every page load and re-runs on LoginButton auth events. The |
| entire admin panel (table + every control) lives in a column that |
| stays hidden unless the visitor is logged in AND in the admin set, so |
| non-admins and logged-out visitors see only the login/logout button |
| and a status line -- no table, no buttons. For admins the panel is |
| shown, its controls enabled, and the table refreshed from live Hub |
| data. Data is only loaded into the table for admins, and a server-side |
| ``is_admin`` re-check still guards every handler. The armed-by- |
| confirmation buttons (delete, stop-and-delete, rescore-selected, |
| rescore-all) always load disarmed: they only enable once their confirm |
| box is ticked / phrase typed. |
| """ |
| admin = is_admin(profile) |
| if admin: |
| admin_df, error = _safe_load_admin() |
| if error: |
| gr.Warning(f"Admin table unavailable: {error}") |
| else: |
| admin_df = _empty_admin_table() |
| if profile is None: |
| status = "Log in with an admin account to access the controls." |
| elif admin: |
| status = f"Signed in as `{profile.username}`. Admin controls enabled." |
| else: |
| status = ( |
| f"Signed in as `{profile.username}`, which is not in the admin " |
| "set. You can log out with the button above." |
| ) |
| return ( |
| gr.Column(visible=admin), |
| gr.Dataframe(value=admin_df, interactive=admin), |
| gr.Radio(interactive=admin), |
| gr.Button(interactive=admin), |
| gr.Button(interactive=admin), |
| gr.Checkbox(interactive=admin, value=False), |
| gr.Button(interactive=False), |
| gr.Button(interactive=False), |
| gr.Checkbox(interactive=admin, value=False), |
| gr.Button(interactive=False), |
| gr.Textbox(interactive=admin, value=""), |
| gr.Button(interactive=False), |
| status, |
| ) |
|
|
|
|
| def _arm_delete( |
| confirm: bool, profile: gr.OAuthProfile | None, |
| ) -> tuple[gr.Button, gr.Button]: |
| """Arm both destructive buttons once an admin ticks the confirm box. |
| |
| The plain delete and the stop-and-delete share the single confirm |
| checkbox, so a deliberate tick is required before either fires. |
| """ |
| armed = bool(confirm) and is_admin(profile) |
| return gr.Button(interactive=armed), gr.Button(interactive=armed) |
|
|
|
|
| def _empty_admin_table() -> pd.DataFrame: |
| """An admin frame with headers but no rows -- what non-admins get. |
| |
| The admin panel is hidden from non-admins, but the table refreshers |
| still run server-side; returning an empty frame ensures no submission |
| data is ever streamed into a non-admin's (hidden) table. |
| """ |
| return pd.DataFrame(columns=list(ADMIN_COLUMNS)) |
|
|
|
|
| def _refresh_admin_table(profile: gr.OAuthProfile | None) -> pd.DataFrame: |
| """Admin Refresh button handler: reload the admin table, toast on failure. |
| |
| Uses the no-crash :func:`_safe_load_admin` so a Hub read failure |
| surfaces as a loud ``gr.Warning`` plus an empty table rather than an |
| uncaught exception. Returns an empty frame to non-admins so a tampered |
| client can't pull the table out from behind the hidden panel. |
| """ |
| if not is_admin(profile): |
| return _empty_admin_table() |
| admin_df, error = _safe_load_admin() |
| if error: |
| gr.Warning(f"Admin table unavailable: {error}") |
| return admin_df |
|
|
|
|
| def _reapply_selection( |
| fresh: pd.DataFrame, selected: set[str], |
| ) -> pd.DataFrame: |
| """Re-tick the ``select`` column on rows the maintainer had selected. |
| |
| A freshly-loaded admin frame comes back all-unchecked; this carries |
| the prior ticks forward by ``submission_id`` so a background refresh |
| doesn't wipe an in-progress selection. Ids that vanished (e.g. a row |
| deleted out from under the table) simply drop out. |
| """ |
| if ( |
| selected |
| and ADMIN_SELECT_COL in fresh.columns |
| and "submission_id" in fresh.columns |
| ): |
| fresh[ADMIN_SELECT_COL] = ( |
| fresh["submission_id"].astype(str).isin(selected) |
| ) |
| return fresh |
|
|
|
|
| def _auto_refresh_admin_table( |
| current_df: pd.DataFrame | None, |
| profile: gr.OAuthProfile | None, |
| ) -> pd.DataFrame: |
| """Timer-tick handler: reload the admin table, preserving ticked rows. |
| |
| The leaderboard tables auto-refresh every 10s but the admin table did |
| not, so a pending row submitted after the tab loaded stayed invisible |
| until a manual Refresh. This keeps it current on the same cadence. |
| Unlike the leaderboard handler it stays silent (no per-tick toast) |
| and, on a Hub read failure, returns the current frame unchanged so a |
| transient blip never blanks the table or drops the user's selection. |
| Non-admins get an empty frame so the (hidden) table is never fed data. |
| """ |
| if not is_admin(profile): |
| return _empty_admin_table() |
| admin_df, error = _safe_load_admin() |
| if error: |
| return current_df if current_df is not None else admin_df |
| return _reapply_selection(admin_df, set(_selected_ids(current_df))) |
|
|
|
|
| def _admin_promote( |
| table_df: pd.DataFrame | None, |
| method: str | None, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, str]: |
| """Promote ticked rows, then refresh admin, leaderboard, and gallery. |
| |
| Re-checks :func:`admin.is_admin` server-side so a tampered client |
| that re-enables the button still can't write. |
| """ |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| ids = _selected_ids(table_df) |
| if not ids: |
| raise gr.Error("Tick at least one row first.") |
| if not method: |
| raise gr.Error("Pick a validation_method first.") |
| try: |
| promote_rows(ids, method) |
| except (LookupError, ValueError) as e: |
| raise gr.Error(str(e)) |
| gr.Info(f"Promoted {len(ids)} row(s) to validated ({method}).") |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return admin_df, validated, unvalidated, _gallery_iframe_html() |
|
|
|
|
| def _admin_demote( |
| table_df: pd.DataFrame | None, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, str]: |
| """Demote ticked rows, then refresh admin, leaderboard, and gallery.""" |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| ids = _selected_ids(table_df) |
| if not ids: |
| raise gr.Error("Tick at least one row first.") |
| try: |
| demote_rows(ids) |
| except (LookupError, ValueError) as e: |
| raise gr.Error(str(e)) |
| gr.Info(f"Demoted {len(ids)} row(s) to unvalidated.") |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return admin_df, validated, unvalidated, _gallery_iframe_html() |
|
|
|
|
| def _admin_delete( |
| table_df: pd.DataFrame | None, |
| confirm: bool, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[ |
| pd.DataFrame, pd.DataFrame, pd.DataFrame, str, gr.Checkbox, gr.Button, |
| gr.Button, |
| ]: |
| """Delete ticked rows, then refresh admin, leaderboard, gallery, and disarm. |
| |
| Resets the confirm checkbox and re-disables both destructive buttons |
| on the way out so the next deletion needs a fresh, deliberate confirm. |
| """ |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| if not confirm: |
| raise gr.Error("Tick the confirmation box to enable delete.") |
| ids = _selected_ids(table_df) |
| if not ids: |
| raise gr.Error("Tick at least one row first.") |
| try: |
| delete_rows(ids) |
| except ValueError as e: |
| raise gr.Error(str(e)) |
| gr.Info(f"Deleted {len(ids)} submission(s).") |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return ( |
| admin_df, |
| validated, |
| unvalidated, |
| _gallery_iframe_html(), |
| gr.Checkbox(value=False), |
| gr.Button(interactive=False), |
| gr.Button(interactive=False), |
| ) |
|
|
|
|
| def _admin_stop_delete( |
| table_df: pd.DataFrame | None, |
| confirm: bool, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[ |
| pd.DataFrame, pd.DataFrame, pd.DataFrame, str, gr.Checkbox, gr.Button, |
| gr.Button, |
| ]: |
| """Stop running eval job(s) for ticked rows, delete them, then disarm. |
| |
| Same gating + disarm contract as :func:`_admin_delete`; the only |
| difference is it calls :func:`admin.stop_and_delete_rows`, which |
| best-effort cancels the submissions' in-flight HF Jobs before |
| deleting. Use this for pending rows whose GPU eval is still running. |
| """ |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| if not confirm: |
| raise gr.Error("Tick the confirmation box to enable delete.") |
| ids = _selected_ids(table_df) |
| if not ids: |
| raise gr.Error("Tick at least one row first.") |
| try: |
| stop_and_delete_rows(ids) |
| except ValueError as e: |
| raise gr.Error(str(e)) |
| gr.Info(f"Stopped + deleted {len(ids)} submission(s).") |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return ( |
| admin_df, |
| validated, |
| unvalidated, |
| _gallery_iframe_html(), |
| gr.Checkbox(value=False), |
| gr.Button(interactive=False), |
| gr.Button(interactive=False), |
| ) |
|
|
|
|
| |
| |
| |
| |
| |
| RESCORE_ALL_PHRASE = "RESCORE ALL" |
|
|
|
|
| def _arm_rescore_selected( |
| confirm: bool, profile: gr.OAuthProfile | None, |
| ) -> gr.Button: |
| """Arm the rescore-selected button once an admin ticks its confirm box.""" |
| return gr.Button(interactive=bool(confirm) and is_admin(profile)) |
|
|
|
|
| def _arm_rescore_all( |
| phrase: str | None, profile: gr.OAuthProfile | None, |
| ) -> gr.Button: |
| """Arm the rescore-all button only on an exact phrase match by an admin.""" |
| matched = (phrase or "").strip() == RESCORE_ALL_PHRASE |
| return gr.Button(interactive=matched and is_admin(profile)) |
|
|
|
|
| def _rescore_result_message(dispatched: int, skipped: list[str]) -> str: |
| """Toast text summarising a rescore dispatch.""" |
| msg = ( |
| f"Rescoring {dispatched} submission(s): rows flipped to pending and " |
| f"re-evaluating in the background. The leaderboard repopulates as " |
| f"each finishes." |
| ) |
| if skipped: |
| msg += ( |
| f" Skipped {len(skipped)} row(s) with no stored zip (legacy seed " |
| f"rows can't be rescored)." |
| ) |
| return msg |
|
|
|
|
| def _admin_rescore_selected( |
| table_df: pd.DataFrame | None, |
| confirm: bool, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[ |
| pd.DataFrame, pd.DataFrame, pd.DataFrame, str, gr.Checkbox, gr.Button, |
| ]: |
| """Re-evaluate the ticked rows, refresh the views, then disarm. |
| |
| Same gating contract as the destructive handlers: server-side |
| ``is_admin`` re-check, an explicit confirm tick, and a non-empty |
| selection. Resets the confirm box + disarms the button on the way |
| out so the next rescore needs a fresh, deliberate confirm. |
| """ |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| if not confirm: |
| raise gr.Error("Tick the confirmation box to enable rescore.") |
| ids = _selected_ids(table_df) |
| if not ids: |
| raise gr.Error("Tick at least one row first.") |
| try: |
| dispatched, skipped = rescore_rows(ids) |
| except (LookupError, ValueError) as e: |
| raise gr.Error(str(e)) |
| gr.Info(_rescore_result_message(dispatched, skipped)) |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return ( |
| admin_df, |
| validated, |
| unvalidated, |
| _gallery_iframe_html(), |
| gr.Checkbox(value=False), |
| gr.Button(interactive=False), |
| ) |
|
|
|
|
| def _admin_rescore_all( |
| phrase: str | None, |
| profile: gr.OAuthProfile | None, |
| ) -> tuple[ |
| pd.DataFrame, pd.DataFrame, pd.DataFrame, str, gr.Textbox, gr.Button, |
| ]: |
| """Re-evaluate every rescoreable row, refresh the views, then disarm. |
| |
| The heavy, board-wide action: re-checks ``is_admin`` and the exact |
| confirmation phrase server-side (so a tampered client that |
| re-enables the button still can't fire), clears the phrase box, and |
| disarms the button afterwards. |
| """ |
| if not is_admin(profile): |
| raise gr.Error("You are not in the admin set.") |
| if (phrase or "").strip() != RESCORE_ALL_PHRASE: |
| raise gr.Error( |
| f"Type '{RESCORE_ALL_PHRASE}' exactly to confirm a full rescore." |
| ) |
| try: |
| dispatched, skipped = rescore_all() |
| except ValueError as e: |
| raise gr.Error(str(e)) |
| gr.Info(_rescore_result_message(dispatched, skipped)) |
| validated, unvalidated, _ = _safe_load_split() |
| admin_df, _ = _safe_load_admin() |
| return ( |
| admin_df, |
| validated, |
| unvalidated, |
| _gallery_iframe_html(), |
| gr.Textbox(value=""), |
| gr.Button(interactive=False), |
| ) |
|
|
|
|
| @lru_cache(maxsize=128) |
| def _fetch_report_html(submission_id: str) -> bytes | None: |
| """Pull ``reports/<id>.html`` off the submissions dataset. |
| |
| Cached in-process so repeat clicks on the same row don't hit |
| the Hub. Returns ``None`` on any failure so the caller can |
| serve a clean 404 rather than leaking a stack trace. |
| """ |
| try: |
| local_path = hf_hub_download( |
| repo_id=HF_SUBMISSIONS_REPO, |
| filename=f"reports/{submission_id}.html", |
| repo_type="dataset", |
| ) |
| return Path(local_path).read_bytes() |
| except Exception as e: |
| logger.warning( |
| "Failed to fetch report for %s (%s: %s)", |
| submission_id, type(e).__name__, e, |
| ) |
| return None |
|
|
|
|
| def serve_report(submission_id: str) -> Response: |
| """Proxy a per-submission HTML report through the Space. |
| |
| HF Hub serves dataset HTML under ``/resolve/`` with |
| ``Content-Type: text/plain`` (security: dataset files can't host |
| live HTML), so a direct dataset link shows source instead of |
| rendering. This route lives on the Space (which can legitimately |
| serve text/html) and re-streams the file's bytes with the right |
| content-type. |
| """ |
| content = _fetch_report_html(submission_id) |
| if content is None: |
| return HTMLResponse( |
| content="<h1>Report not found</h1>", |
| status_code=404, |
| ) |
| return Response(content=content, media_type="text/html; charset=utf-8") |
|
|
|
|
| def serve_metrics_page() -> Response: |
| """Serve the static metrics explainer at ``/metrics``. |
| |
| Same-origin as the report proxy (``/reports/<id>.html``), so a |
| hosted report's headline pills can deep-link to ``/metrics#<anchor>`` |
| and land on the matching section. The "Metrics" Gradio tab embeds |
| this same route in an iframe. |
| """ |
| return HTMLResponse(content=build_metrics_page()) |
|
|
|
|
| |
| |
| |
| |
| METRICS_ASSETS_DIR = Path(__file__).parent / "assets" / "metrics" |
|
|
|
|
| def serve_metrics_asset(name: str) -> Response: |
| """Serve a bundled metrics illustration from ``assets/metrics/``. |
| |
| Flat namespace (no nested paths), traversal-guarded. Cached hard: |
| these are static, versioned-with-the-repo assets. |
| """ |
| if "/" in name or ".." in name: |
| return Response(status_code=404) |
| path = METRICS_ASSETS_DIR / name |
| if not path.is_file(): |
| return Response(status_code=404) |
| media_type = mimetypes.guess_type(name)[0] or "application/octet-stream" |
| return Response( |
| content=path.read_bytes(), |
| media_type=media_type, |
| headers={"Cache-Control": RENDER_CACHE_CONTROL}, |
| ) |
|
|
|
|
| def _fetch_gt_render(fixture: str) -> bytes | None: |
| """Pull a fixture's ground-truth GIF from the private GT dataset. |
| |
| Path inside the GT repo is ``<fixture>/renders/rotating.webp``. GT |
| renders are a property of the data revision, not of any submission, |
| so they're served straight from the GT repo rather than duplicated |
| per submission. Not memoized for the same reason as :func:`_fetch_render` (GT |
| renders can be added/updated on a data revision bump); |
| ``hf_hub_download`` handles the per-revision disk cache. Needs the |
| Space ``HF_TOKEN``'s read scope on the private repo. |
| """ |
| try: |
| local_path = hf_hub_download( |
| repo_id=HF_DATA_GT_REPO, |
| filename=f"{fixture}/renders/rotating.webp", |
| repo_type="dataset", |
| ) |
| return Path(local_path).read_bytes() |
| except Exception as e: |
| logger.warning( |
| "Failed to fetch GT render for %s (%s: %s)", |
| fixture, type(e).__name__, e, |
| ) |
| return None |
|
|
|
|
| |
| |
| |
| |
| |
| RENDER_CACHE_CONTROL = "public, max-age=31536000, immutable" |
|
|
|
|
| def _render_proxy_url(submission_id: str, fixture: str) -> str | None: |
| """Resolver for a submission's plain turntable: a public render-bucket URL. |
| |
| The eval job uploads ``renders/<id>/<fixture>/rotating.webp`` to the public |
| bucket, so the browser fetches it straight from object storage (anonymous, |
| no Space proxy hop). The gallery only calls this for ``valid`` fixtures; a |
| missing upload 404s and degrades to the dashed cell via ``<img onerror>``. |
| """ |
| return render_public_url(submission_id, fixture, "rotating.webp") |
|
|
|
|
| def _render_diff_proxy_url(submission_id: str, fixture: str) -> str | None: |
| """Resolver for an editing fixture's edit-diff turntable (public bucket URL). |
| |
| Used by the gallery grid for editing fixtures (see |
| ``gallery.build_gallery_payload``). A miss (non-editing fixture, or an edit |
| that never rendered a diff) 404s and degrades to the dashed cell, no |
| fallback to the plain turntable. |
| """ |
| return render_public_url(submission_id, fixture, "edit_diff.webp") |
|
|
|
|
| def _gt_proxy_url(fixture: str) -> str | None: |
| """Resolver returning the cached proxy URL for a fixture's GT WebP. |
| |
| GT renders stay in the **private** GT dataset, so they cannot be public |
| bucket URLs; they are still re-streamed through the Space proxy (which |
| holds the read token). |
| """ |
| return f"/gt-render/{fixture}.webp" |
|
|
|
|
| def _gt_diff_proxy_url(fixture: str) -> str | None: |
| """Resolver for an editing fixture's GT "answer key" edit-diff WebP. |
| |
| The one-time GT generation (``tools/generate_gt_edit_diff.py``) writes |
| ``<fixture>/renders/edit_diff_gt.webp`` into the private GT dataset, so it |
| rides the existing generic GT proxy (``serve_gt_file``) rather than needing |
| a route of its own. The gallery uses this for the ground-truth row on |
| editing fixtures; a missing file 404s and degrades to the dashed cell. |
| """ |
| return f"/gt/{fixture}/renders/edit_diff_gt.webp" |
|
|
|
|
| def serve_gt_render(fixture: str) -> Response: |
| """Stream a fixture's ground-truth render WebP with long-lived caching.""" |
| webp = _fetch_gt_render(fixture) |
| if webp is None: |
| return Response(status_code=404) |
| return Response( |
| content=webp, |
| media_type="image/webp", |
| headers={"Cache-Control": RENDER_CACHE_CONTROL}, |
| ) |
|
|
|
|
| def _fetch_gt_file(fixture: str, relpath: str) -> bytes | None: |
| """Pull an arbitrary GT asset (``<fixture>/<relpath>``) from the GT dataset. |
| |
| Serves the hosted report's ground-truth column: the per-view PNGs |
| (``renders/<view>.png``) and the ``ground_truth.pdf``. The GT dataset is |
| **private**, so these are proxied through the Space (which holds the read |
| token) rather than linked directly. ``hf_hub_download`` does the |
| per-revision disk cache. Returns ``None`` on any failure (the report hides |
| the broken tile via the browser's normal missing-image handling). |
| """ |
| try: |
| local_path = hf_hub_download( |
| repo_id=HF_DATA_GT_REPO, |
| filename=f"{fixture}/{relpath}", |
| repo_type="dataset", |
| ) |
| return Path(local_path).read_bytes() |
| except Exception as e: |
| logger.warning( |
| "Failed to fetch GT file %s/%s (%s: %s)", |
| fixture, relpath, type(e).__name__, e, |
| ) |
| return None |
|
|
|
|
| def serve_gt_file(fixture: str, relpath: str) -> Response: |
| """Stream a GT asset (view PNG / PDF) with long-lived immutable caching. |
| |
| Path-traversal-guarded (``..`` rejected). The hosted report references |
| ``/gt/<fixture>/<relpath>`` and the browser fetches it lazily; the bytes |
| are a property of the data revision (not any submission), so the same |
| immutable ``Cache-Control`` as the render/input proxies applies. |
| """ |
| if ".." in fixture or ".." in relpath: |
| return Response(status_code=404) |
| data = _fetch_gt_file(fixture, relpath) |
| if data is None: |
| return Response(status_code=404) |
| media_type = mimetypes.guess_type(relpath)[0] or "application/octet-stream" |
| return Response( |
| content=data, |
| media_type=media_type, |
| headers={"Cache-Control": RENDER_CACHE_CONTROL}, |
| ) |
|
|
|
|
| def _gallery_iframe_html() -> str: |
| """Build the gallery as a self-contained ``srcdoc`` iframe. |
| |
| Reads the live rows and renders the page (turntables referenced as |
| cached ``/render`` / ``/gt-render`` proxy URLs, lazy-loaded by the |
| browser), then inlines the whole document into an iframe ``srcdoc`` |
| so it gets its own style context (no Gradio CSS collision). A Hub |
| read failure degrades to an empty gallery rather than crashing the |
| tab. |
| """ |
| try: |
| rows = _load_rows_from_hub() |
| except LeaderboardDataError: |
| logger.exception("Gallery row load failed; rendering empty gallery") |
| rows = [] |
| doc = render_gallery_page( |
| rows, _render_proxy_url, _gt_proxy_url, _render_diff_proxy_url, |
| _gt_diff_proxy_url, |
| ) |
| escaped = html.escape(doc, quote=True) |
| |
| |
| |
| |
| |
| |
| return ( |
| f'<iframe srcdoc="{escaped}" ' |
| 'style="width:100%; height:80vh; border:0; display:block;" ' |
| 'title="CADGenBench gallery"></iframe>' |
| ) |
|
|
|
|
| def _fetch_task_input(fixture: str, relpath: str) -> bytes | None: |
| """Pull a fixture input asset (``<fixture>/<relpath>``) from the inputs repo. |
| |
| Serves the Task-browser tab's drawings / starting-shape renders. |
| The inputs dataset is private, so these are proxied through the |
| Space (which holds the read token) rather than linked directly — |
| mirroring :func:`_fetch_render`. Not memoized for the same reason: |
| inputs can be added/updated on a data revision bump, and |
| ``hf_hub_download`` already does per-revision disk caching. Returns |
| ``None`` on any failure (the page hides the broken tile). |
| """ |
| try: |
| local_path = hf_hub_download( |
| repo_id=HF_DATA_REPO, |
| filename=f"{fixture}/{relpath}", |
| repo_type="dataset", |
| ) |
| return Path(local_path).read_bytes() |
| except Exception as e: |
| logger.warning( |
| "Failed to fetch task input %s/%s (%s: %s)", |
| fixture, relpath, type(e).__name__, e, |
| ) |
| return None |
|
|
|
|
| def _task_input_url(fixture: str, relpath: str) -> str: |
| """Resolver returning the Space proxy URL for a task input asset. |
| |
| Returns the route string without fetching bytes (the browser |
| lazy-fetches only the on-screen task's images). An absolute path |
| resolves against the Space origin even inside the iframe ``srcdoc``. |
| """ |
| return f"/task-input/{fixture}/{relpath}" |
|
|
|
|
| def serve_task_input(fixture: str, relpath: str) -> Response: |
| """Stream a fixture input asset with long-lived immutable caching. |
| |
| Path-traversal-guarded (``..`` rejected). The task browser |
| references ``/task-input/<fixture>/<relpath>`` and the browser |
| fetches it lazily; re-streams the dataset bytes (the Space holds the |
| read token) with the same immutable ``Cache-Control`` as the render |
| proxies so the CDN/browser cache them hard. |
| """ |
| if ".." in fixture or ".." in relpath: |
| return Response(status_code=404) |
| data = _fetch_task_input(fixture, relpath) |
| if data is None: |
| return Response(status_code=404) |
| media_type = mimetypes.guess_type(relpath)[0] or "application/octet-stream" |
| return Response( |
| content=data, |
| media_type=media_type, |
| headers={"Cache-Control": RENDER_CACHE_CONTROL}, |
| ) |
|
|
|
|
| def _tasks_iframe_html() -> str: |
| """Build the Task browser as a self-contained ``srcdoc`` iframe. |
| |
| Snapshots just the ``<fixture>/description.yaml`` files from the |
| inputs dataset (lightweight: the drawings/renders themselves load |
| lazily via the ``/task-input`` proxy), shapes them into task cards, |
| and inlines the page into an iframe so it keeps its own style |
| context (no Gradio CSS collision). A Hub read failure degrades to an |
| empty browser rather than crashing the tab. |
| """ |
| try: |
| local = snapshot_download( |
| repo_id=HF_DATA_REPO, |
| repo_type="dataset", |
| allow_patterns=["*/description.yaml"], |
| ) |
| tasks = load_tasks_from_dir(Path(local)) |
| except Exception: |
| logger.exception("Task load failed; rendering empty task browser") |
| tasks = [] |
| doc = render_tasks_page(tasks, _task_input_url) |
| escaped = html.escape(doc, quote=True) |
| return ( |
| f'<iframe srcdoc="{escaped}" ' |
| 'style="width:100%; height:90vh; border:0; display:block;" ' |
| 'title="CADGenBench tasks"></iframe>' |
| ) |
|
|
|
|
| @lru_cache(maxsize=1) |
| def _logo_data_uri() -> str: |
| """Return the header logo as a base64 ``data:`` URI. |
| |
| Inlined rather than served as a static file so the ``<img>`` renders |
| with no dependency on Gradio/FastAPI static-path allowlisting — it |
| works identically when the Space runs locally on a random port and |
| on huggingface.co. The PNG itself lives in the repo at |
| ``assets/logo.png`` (reviewable as a real binary) and is read |
| relative to this module so the Docker image's working dir doesn't |
| matter. Cached because the bytes never change within a process. |
| """ |
| logo_path = Path(__file__).parent / "assets" / "logo.png" |
| data = base64.b64encode(logo_path.read_bytes()).decode("ascii") |
| return f"data:image/png;base64,{data}" |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| _APP_CSS = ( |
| "footer{display:none !important;}" |
| ".gradio-container{padding-top:4px !important; padding-bottom:0 !important;}" |
| |
| |
| |
| |
| "#cgb-title{margin:0 !important;padding:0 !important;min-width:0 !important;}" |
| "#cgb-title .cgb-logo{height:46px;width:auto;display:block;margin:0;}" |
| ".gradio-container .tabs{margin-top:-6px !important;}" |
| ".dark #cgb-title .cgb-logo{filter:invert(1);}" |
| "@media (prefers-color-scheme: dark){" |
| "#cgb-title .cgb-logo{filter:invert(1);}}" |
| ) |
|
|
| with gr.Blocks( |
| title="CADGenBench Leaderboard", theme=gr.themes.Soft(), css=_APP_CSS, |
| ) as blocks: |
| |
| |
| |
| |
| gr.HTML( |
| f'<img class="cgb-logo" src="{_logo_data_uri()}" ' |
| 'alt="CADGenBench Leaderboard">', |
| elem_id="cgb-title", |
| ) |
|
|
| with gr.Tab("Leaderboard"): |
| |
| |
| |
| |
| |
| |
| |
| gallery_html = gr.HTML(value=_gallery_iframe_html()) |
| gallery_refresh_btn = gr.Button("Refresh gallery", size="sm") |
| gallery_refresh_btn.click( |
| fn=_gallery_iframe_html, outputs=gallery_html, |
| ) |
|
|
| with gr.Tab("Detailed View"): |
| |
| |
| |
| |
| initial_validated, initial_unvalidated, initial_error = _safe_load_split() |
|
|
| |
| |
| |
| |
| |
| |
| data_error_banner = gr.Markdown( |
| value=_data_error_banner_md(initial_error), |
| visible=initial_error is not None, |
| ) |
|
|
| |
| |
| |
| |
| with gr.Accordion("Validation guidelines", open=False): |
| gr.Markdown(VALIDATION_GUIDELINES_MD) |
| with gr.Accordion("Citation", open=False): |
| |
| |
| |
| gr.Code( |
| value=CITATION_BIBTEX, |
| language=None, |
| show_line_numbers=False, |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| validated_view = Leaderboard( |
| value=initial_validated, |
| datatype=VALIDATED_LEADERBOARD_DATATYPES, |
| search_columns=["submission_name", "submitter_name"], |
| hide_columns=LEADERBOARD_HIDE_COLUMNS, |
| label="Validated Leaderboard", |
| interactive=False, |
| ) |
| unvalidated_view = Leaderboard( |
| value=initial_unvalidated, |
| datatype=LEADERBOARD_DATATYPES, |
| search_columns=["submission_name", "submitter_name"], |
| hide_columns=LEADERBOARD_HIDE_COLUMNS, |
| label="Unvalidated Leaderboard", |
| interactive=False, |
| ) |
| with gr.Row(): |
| refresh_btn = gr.Button("Refresh", size="sm") |
| |
| |
| |
| |
| download_btn = gr.DownloadButton( |
| label="Download CSV", size="sm", |
| ) |
| refresh_btn.click( |
| fn=_refresh_leaderboard_with_toast, |
| outputs=[validated_view, unvalidated_view, data_error_banner], |
| ) |
| download_btn.click(fn=build_combined_csv, outputs=download_btn) |
|
|
| |
| |
| |
| |
| |
| |
|
|
| with gr.Tab("Tasks"): |
| |
| |
| |
| |
| |
| |
| tasks_html = gr.HTML(value=_tasks_iframe_html()) |
| tasks_refresh_btn = gr.Button("Refresh tasks", size="sm") |
| tasks_refresh_btn.click(fn=_tasks_iframe_html, outputs=tasks_html) |
|
|
| with gr.Tab("Metrics"): |
| |
| |
| |
| |
| gr.HTML( |
| '<iframe src="/metrics" ' |
| 'style="width:100%; height:85vh; border:0; display:block;" ' |
| 'title="CADGenBench metrics"></iframe>' |
| ) |
|
|
| with gr.Tab("Submit"): |
| gr.Markdown( |
| f""" |
| **Submission format.** A single zip with: |
| |
| - one folder per sample in `{HF_DATA_REPO}`; include `output.step` for |
| samples where your system produced a candidate. Missing `output.step` |
| scores zero for that sample; |
| - a top-level `meta.json`: |
| |
| ```json |
| {{ |
| "submitter_name": "your name or team", |
| "submission_name": "MyAgent v2.3 (or whatever describes your system)", |
| "agent_url": "https://github.com/... (optional)", |
| "notes": "free text, optional, max 500 chars, single line, plain text", |
| "agree_to_publish": true |
| }} |
| ``` |
| |
| **Submission name.** Free text describing the system being benchmarked, |
| however you choose to describe it. The benchmark is system-agnostic: your |
| submission may use no LLM, one, or many. If you want to disclose your |
| stack, put it here or in `notes`. |
| |
| **Notes field.** Plain text only (no markdown / HTML). Capped at 500 chars |
| and stripped to a single line. Shown in the per-submission detail view, |
| not in the main leaderboard table. |
| |
| **Consent.** `"agree_to_publish": true` in `meta.json` is your consent |
| to publish the resulting row on the public leaderboard. |
| |
| For the full submission contract (output format, validity gate, canonical |
| pose, and a local self-check), see |
| [`docs/benchmark/submission.md`]({SUBMISSION_DOC_URL}). |
| """ |
| ) |
| |
| |
| |
| |
| |
| |
| login_btn = gr.LoginButton() |
| zip_in = gr.File(label="Submission ZIP", file_types=[".zip"]) |
| |
| |
| submit_btn = gr.Button("Submit", variant="primary", interactive=False) |
| |
| |
| |
| |
| |
| |
| |
| submit_status = gr.Markdown(value=SUBMIT_STATUS_IDLE) |
| submit_btn.click( |
| fn=handle_submit, |
| inputs=[zip_in], |
| outputs=[submit_status], |
| ) |
|
|
| with gr.Tab("About"): |
| gr.Markdown(ABOUT_MD) |
|
|
| with gr.Tab("Admin"): |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| admin_login_btn = gr.LoginButton() |
| admin_status = gr.Markdown( |
| "Log in with an admin account to access the controls." |
| ) |
| |
| |
| with gr.Column(visible=False) as admin_panel: |
| gr.Markdown( |
| "## Admin\n" |
| "Tick rows in the **select** column, then promote them into " |
| "the **Validated** tier (recording an evidence type), demote " |
| "them back to **Unvalidated**, delete them, or rescore them " |
| "against the current ground truth. Actions apply to every " |
| "ticked row at once." |
| ) |
| |
| |
| |
| |
| admin_table = gr.Dataframe( |
| value=_empty_admin_table(), |
| datatype=[ |
| "bool", "str", "str", "str", "str", "str", "str", |
| "number", "str", |
| ], |
| static_columns=list(range(1, len(ADMIN_COLUMNS))), |
| interactive=False, |
| label="Submissions (tick select to choose rows)", |
| wrap=True, |
| ) |
| admin_selection_md = gr.Markdown("_No rows selected._") |
| admin_method_radio = gr.Radio( |
| choices=list(VALID_METHODS), |
| value="manual", |
| label="validation_method (applied to all rows on promote)", |
| interactive=False, |
| ) |
| with gr.Row(): |
| promote_btn = gr.Button( |
| "Mark validated", variant="primary", interactive=False, |
| ) |
| demote_btn = gr.Button("Mark unvalidated", interactive=False) |
| with gr.Accordion("Danger zone: delete", open=False): |
| gr.Markdown( |
| "Permanently deletes the ticked rows **and** their " |
| "uploaded zip + report files from the submissions " |
| "dataset. This cannot be undone (only a manual revert of " |
| "the dataset commit).\n\n" |
| "**Stop & delete** additionally cancels any still-running " |
| "evaluation job(s) for the ticked rows before deleting — " |
| "use it for pending submissions whose GPU eval is in " |
| "flight." |
| ) |
| delete_confirm = gr.Checkbox( |
| label=( |
| "I understand this permanently deletes the selected " |
| "submissions and their files." |
| ), |
| value=False, |
| interactive=False, |
| ) |
| with gr.Row(): |
| delete_btn = gr.Button( |
| "Delete selected", variant="stop", interactive=False, |
| ) |
| stop_delete_btn = gr.Button( |
| "Stop & delete selected", variant="stop", |
| interactive=False, |
| ) |
| with gr.Accordion("Danger zone: rescore", open=False): |
| gr.Markdown( |
| "Re-evaluates submissions against the **current** " |
| "ground truth + data: each row flips back to pending, the " |
| "gallery renders and the per-submission report HTML are " |
| "regenerated, and the score is recomputed. Use after a " |
| "ground-truth swap or a metric change that invalidates " |
| "the existing scores.\n\n" |
| "Rescoring is **re-runnable**: if a row's eval fails, " |
| "mark it and rescore again (or rescore all) — each run is " |
| "independent and converges.\n\n" |
| "- **Rescore selected** re-evaluates the ticked rows.\n" |
| f"- **Rescore all** re-evaluates every submission that " |
| f"has a stored zip and isn't already pending — type " |
| f"`{RESCORE_ALL_PHRASE}` to arm it." |
| ) |
| rescore_confirm = gr.Checkbox( |
| label=( |
| "I understand this flips the selected rows to pending " |
| "and recomputes their scores." |
| ), |
| value=False, |
| interactive=False, |
| ) |
| rescore_selected_btn = gr.Button( |
| "Rescore selected", variant="stop", interactive=False, |
| ) |
| rescore_all_phrase = gr.Textbox( |
| label=( |
| f"Type '{RESCORE_ALL_PHRASE}' to arm the board-wide " |
| f"rescore" |
| ), |
| placeholder=RESCORE_ALL_PHRASE, |
| interactive=False, |
| ) |
| rescore_all_btn = gr.Button( |
| "Rescore ALL submissions", variant="stop", |
| interactive=False, |
| ) |
| admin_refresh_btn = gr.Button("Refresh", size="sm") |
|
|
| admin_table.change( |
| fn=_admin_selection_status, |
| inputs=admin_table, |
| outputs=admin_selection_md, |
| ) |
| promote_btn.click( |
| fn=_admin_promote, |
| inputs=[admin_table, admin_method_radio], |
| outputs=[admin_table, validated_view, unvalidated_view, gallery_html], |
| ) |
| demote_btn.click( |
| fn=_admin_demote, |
| inputs=[admin_table], |
| outputs=[admin_table, validated_view, unvalidated_view, gallery_html], |
| ) |
| delete_confirm.change( |
| fn=_arm_delete, |
| inputs=[delete_confirm], |
| outputs=[delete_btn, stop_delete_btn], |
| ) |
| delete_btn.click( |
| fn=_admin_delete, |
| inputs=[admin_table, delete_confirm], |
| outputs=[ |
| admin_table, validated_view, unvalidated_view, gallery_html, |
| delete_confirm, delete_btn, stop_delete_btn, |
| ], |
| ) |
| stop_delete_btn.click( |
| fn=_admin_stop_delete, |
| inputs=[admin_table, delete_confirm], |
| outputs=[ |
| admin_table, validated_view, unvalidated_view, gallery_html, |
| delete_confirm, delete_btn, stop_delete_btn, |
| ], |
| ) |
| rescore_confirm.change( |
| fn=_arm_rescore_selected, |
| inputs=[rescore_confirm], |
| outputs=[rescore_selected_btn], |
| ) |
| rescore_selected_btn.click( |
| fn=_admin_rescore_selected, |
| inputs=[admin_table, rescore_confirm], |
| outputs=[ |
| admin_table, validated_view, unvalidated_view, gallery_html, |
| rescore_confirm, rescore_selected_btn, |
| ], |
| ) |
| rescore_all_phrase.change( |
| fn=_arm_rescore_all, |
| inputs=[rescore_all_phrase], |
| outputs=[rescore_all_btn], |
| ) |
| rescore_all_btn.click( |
| fn=_admin_rescore_all, |
| inputs=[rescore_all_phrase], |
| outputs=[ |
| admin_table, validated_view, unvalidated_view, gallery_html, |
| rescore_all_phrase, rescore_all_btn, |
| ], |
| ) |
| admin_refresh_btn.click(fn=_refresh_admin_table, outputs=admin_table) |
|
|
| |
| |
| |
| |
| |
| admin_auto_refresh_timer = gr.Timer(10) |
| admin_auto_refresh_timer.tick( |
| fn=_auto_refresh_admin_table, |
| inputs=admin_table, |
| outputs=admin_table, |
| ) |
|
|
| |
| |
| |
| |
| |
| |
| |
| auto_refresh_timer = gr.Timer(10) |
| auto_refresh_timer.tick( |
| fn=_auto_refresh_leaderboard, |
| outputs=[validated_view, unvalidated_view, data_error_banner], |
| ) |
|
|
| |
| |
| |
| |
| blocks.load(fn=_enable_submit_when_logged_in, outputs=submit_btn) |
| blocks.load(fn=_gallery_iframe_html, outputs=gallery_html) |
| blocks.load(fn=_tasks_iframe_html, outputs=tasks_html) |
|
|
| |
| |
| |
| |
| blocks.load( |
| fn=_gate_admin_controls, |
| outputs=[ |
| admin_panel, |
| admin_table, |
| admin_method_radio, |
| promote_btn, |
| demote_btn, |
| delete_confirm, |
| delete_btn, |
| stop_delete_btn, |
| rescore_confirm, |
| rescore_selected_btn, |
| rescore_all_phrase, |
| rescore_all_btn, |
| admin_status, |
| ], |
| ) |
|
|
|
|
| |
| |
| |
| |
| app = FastAPI() |
| app.add_api_route( |
| "/reports/{submission_id}.html", |
| serve_report, |
| methods=["GET"], |
| ) |
| |
| |
| |
| app.add_api_route( |
| "/metrics", |
| serve_metrics_page, |
| methods=["GET"], |
| ) |
| |
| app.add_api_route( |
| "/metrics-assets/{name}", |
| serve_metrics_asset, |
| methods=["GET"], |
| ) |
| |
| |
| |
| |
| |
| |
| app.add_api_route( |
| "/gt-render/{fixture}.webp", |
| serve_gt_render, |
| methods=["GET"], |
| ) |
| |
| |
| |
| |
| app.add_api_route( |
| "/gt/{fixture}/{relpath:path}", |
| serve_gt_file, |
| methods=["GET"], |
| ) |
| |
| |
| |
| app.add_api_route( |
| "/task-input/{fixture}/{relpath:path}", |
| serve_task_input, |
| methods=["GET"], |
| ) |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| if os.environ.get("SPACE_ID") and os.environ.get("SYSTEM") != "spaces": |
| os.environ["SYSTEM"] = "spaces" |
| app = gr.mount_gradio_app(app, blocks, path="/") |
|
|
|
|
| if __name__ == "__main__": |
| host = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0") |
| port = int(os.getenv("GRADIO_SERVER_PORT", "7860")) |
| uvicorn.run(app, host=host, port=port) |
|
|