cadgenbench-leaderboard / tests /test_proxy.py
Michael Rabinovich
leaderboard: serve renders from the public bucket, not the dataset proxy
d2161b1
"""Unit tests for the report-proxy route.
The Space exposes the per-submission HTML report at
``/reports/{submission_id}.html`` (FastAPI route): it re-serves the
file with ``Content-Type: text/html``. Now that the Space is public,
HF's edge serves this route to in-browser users, so the leaderboard's
submission_name cell links straight to it (opening in a new tab)
rather than inlining the report into the page.
Tests stub the Hub fetch via monkeypatch so the suite has zero
network I/O.
"""
from __future__ import annotations
import types
import pandas as pd
import app
import leaderboard
def test_serve_report_returns_html_when_file_exists(monkeypatch):
"""Successful fetch -> 200 + text/html + body passthrough."""
monkeypatch.setattr(
app,
"_fetch_report_html",
lambda sid: b"<!DOCTYPE html><html><body>ok</body></html>",
)
resp = app.serve_report("sub-test")
assert resp.status_code == 200
assert resp.media_type.startswith("text/html")
assert resp.body == b"<!DOCTYPE html><html><body>ok</body></html>"
def test_serve_report_returns_404_when_file_missing(monkeypatch):
"""``_fetch_report_html`` returning None -> 404 with a small html body."""
monkeypatch.setattr(app, "_fetch_report_html", lambda sid: None)
resp = app.serve_report("sub-missing")
assert resp.status_code == 404
# 404 body is still HTML so the browser renders the message.
assert "Report not found" in resp.body.decode("utf-8")
def test_render_resolvers_return_public_bucket_urls():
"""Candidate renders are served straight from the public bucket, no proxy.
The resolvers must return a stable ``/buckets/<id>/resolve/renders/...``
URL (anonymous, browser-followable 302) for the plain turntable and the
edit-diff WebP.
"""
url = app._render_proxy_url("sub-test", "101")
assert url.endswith("/renders/sub-test/101/rotating.webp")
assert "/buckets/" in url and "/resolve/" in url
diff = app._render_diff_proxy_url("sub-test", "207")
assert diff.endswith("/renders/sub-test/207/edit_diff.webp")
assert "/buckets/" in diff and "/resolve/" in diff
def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
"""GT render proxy serves generated GT WebP bytes."""
monkeypatch.setattr(app, "_fetch_gt_render", lambda fixture: b"RIFFwebp")
resp = app.serve_gt_render("101")
assert resp.status_code == 200
assert resp.media_type == "image/webp"
assert resp.body == b"RIFFwebp"
def test_fetch_report_html_returns_none_on_hub_failure(monkeypatch):
"""A Hub-side exception is caught and surfaced as None.
The serve handler relies on this to keep a transient Hub blip
from leaking a stack trace into the Space's HTTP response.
"""
def boom(*a, **kw):
raise RuntimeError("simulated Hub failure")
monkeypatch.setattr(app, "hf_hub_download", boom)
# The lru_cache on _fetch_report_html caches by arg; use a unique
# id per test so prior runs don't shortcut this one.
assert app._fetch_report_html("sub-failure-probe-unique-1") is None
def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
"""GT WebPs are fetched from the GT dataset's per-fixture render folder."""
webp = tmp_path / "rotating.webp"
webp.write_bytes(b"RIFFwebp")
captured: dict = {}
def fake_download(**kwargs):
captured.update(kwargs)
return str(webp)
monkeypatch.setattr(app, "hf_hub_download", fake_download)
assert app._fetch_gt_render("101") == b"RIFFwebp"
assert captured["filename"] == "101/renders/rotating.webp"
def test_proxy_route_is_registered():
"""The mounted FastAPI app exposes ``/reports/{submission_id}.html`` as GET.
Catches the regression where the ``add_api_route`` call moves
below ``mount_gradio_app`` (which would still register the route
but make this regression silent until someone tries to hit it).
"""
routes = [getattr(r, "path", None) for r in app.app.routes]
assert "/reports/{submission_id}.html" in routes
# Candidate renders moved to the public bucket; only the private GT render
# still needs a token-holding Space proxy route.
assert "/render/{submission_id}/{fixture}.webp" not in routes
assert "/gt-render/{fixture}.webp" in routes
# --- Boot resilience: no silent fallback, but no crash either -------
#
# leaderboard.load_leaderboard_split / load_admin_table *raise*
# LeaderboardDataError on any Hub read failure (no fallback to stale
# or bundled data). app.py must turn that into empty tables + a loud
# banner / toast rather than crash at construction time (which would
# take the whole Space down on an under-scoped HF_TOKEN).
def test_safe_load_split_returns_empty_and_error_on_hub_failure(monkeypatch):
"""A failed Hub read yields empty, correctly-shaped frames + a message."""
def boom():
raise leaderboard.LeaderboardDataError("simulated hub failure")
monkeypatch.setattr(app, "load_leaderboard_split", boom)
validated, unvalidated, error = app._safe_load_split()
assert error is not None
assert "simulated hub failure" in error
assert len(validated) == 0
assert len(unvalidated) == 0
# Empty frames keep the declared column shape so the widgets stay
# consistent with their datatypes.
assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS
def test_safe_load_split_passes_through_on_success(monkeypatch):
"""On success the wrapper returns the frames untouched with no error."""
v = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
u = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
monkeypatch.setattr(app, "load_leaderboard_split", lambda: (v, u))
validated, unvalidated, error = app._safe_load_split()
assert error is None
assert validated is v
assert unvalidated is u
def test_safe_load_admin_returns_empty_and_error_on_hub_failure(monkeypatch):
"""Admin counterpart: empty admin frame + message, no exception."""
def boom():
raise leaderboard.LeaderboardDataError("simulated admin hub failure")
monkeypatch.setattr(app, "load_admin_table", boom)
admin_df, error = app._safe_load_admin()
assert error is not None
assert len(admin_df) == 0
assert list(admin_df.columns) == leaderboard.ADMIN_COLUMNS
def test_gate_admin_controls_refreshes_live_table(monkeypatch):
"""Page/auth load refreshes the admin table, not just interactivity."""
live_df = pd.DataFrame(
[
{
"select": False,
"validation_status": "unvalidated",
"validation_method": None,
"submission_name": "UC3 e2e test 20260602-205316",
"submitter_name": "michaelr27",
"submitted_at": "2026-06-02 19:00 UTC",
"status": "completed",
"aggregate_score": 0.5853,
"submission_id": "uc3-e2e",
}
],
columns=leaderboard.ADMIN_COLUMNS,
)
monkeypatch.setattr(app, "_safe_load_admin", lambda: (live_df, None))
monkeypatch.setattr(app, "is_admin", lambda profile: True)
table_update = app._gate_admin_controls(types.SimpleNamespace(username="michaelr27"))[0]
assert table_update.value["headers"] == leaderboard.ADMIN_COLUMNS
assert table_update.value["data"][0][3] == "UC3 e2e test 20260602-205316"
assert table_update.interactive is True
def test_admin_delete_refreshes_gallery(monkeypatch):
"""Deleting rows also replaces the Gallery iframe srcdoc."""
table_df = pd.DataFrame(
[
{
"select": True,
"submission_id": "validated-old-row",
}
]
)
empty_admin = pd.DataFrame(columns=leaderboard.ADMIN_COLUMNS)
empty_validated = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
empty_unvalidated = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
deleted: list[list[str]] = []
monkeypatch.setattr(app, "is_admin", lambda profile: True)
monkeypatch.setattr(app, "delete_rows", lambda ids: deleted.append(ids))
monkeypatch.setattr(
app, "_safe_load_split", lambda: (empty_validated, empty_unvalidated, None)
)
monkeypatch.setattr(app, "_safe_load_admin", lambda: (empty_admin, None))
monkeypatch.setattr(app, "_gallery_iframe_html", lambda: "<iframe>empty</iframe>")
monkeypatch.setattr(app.gr, "Info", lambda *a, **k: None)
admin_df, validated, unvalidated, gallery_html, _confirm, _delete_btn, _stop_btn = (
app._admin_delete(table_df, True, types.SimpleNamespace(username="michaelr27"))
)
assert deleted == [["validated-old-row"]]
assert admin_df is empty_admin
assert validated is empty_validated
assert unvalidated is empty_unvalidated
assert gallery_html == "<iframe>empty</iframe>"
def test_data_error_banner_md_present_on_error_empty_otherwise():
"""Banner markdown is non-empty (and names the cause) only on error."""
assert app._data_error_banner_md(None) == ""
assert app._data_error_banner_md("") == ""
banner = app._data_error_banner_md("boom: 403 Forbidden")
assert "boom: 403 Forbidden" in banner
assert "unavailable" in banner.lower()
def test_refresh_handler_shows_banner_and_warns_on_error(monkeypatch):
"""Manual refresh surfaces the failure loudly: visible banner + warning toast.
``gr.Warning`` / ``gr.Info`` are stubbed so the test runs outside a
Gradio request context; the assertion is that a failure path fires
a warning (not an info) and flips the banner visible.
"""
def boom():
raise leaderboard.LeaderboardDataError("simulated hub failure")
monkeypatch.setattr(app, "load_leaderboard_split", boom)
toasts = {"warning": 0, "info": 0}
monkeypatch.setattr(app.gr, "Warning", lambda *a, **k: toasts.__setitem__("warning", toasts["warning"] + 1))
monkeypatch.setattr(app.gr, "Info", lambda *a, **k: toasts.__setitem__("info", toasts["info"] + 1))
validated, unvalidated, banner = app._refresh_leaderboard_with_toast()
assert toasts["warning"] == 1
assert toasts["info"] == 0
assert len(validated) == 0 and len(unvalidated) == 0
# The banner output is a gr.Markdown update flipped visible.
assert getattr(banner, "visible", None) is True