File size: 10,473 Bytes
5fb3ebc
0e3b21f
5fb3ebc
 
 
 
 
 
0e3b21f
 
 
77edebf
 
 
0e3b21f
 
 
 
77edebf
a662bfa
77edebf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2161b1
 
c1cb5e4
d2161b1
 
 
 
 
 
 
c1cb5e4
d2161b1
 
 
c1cb5e4
 
 
 
 
 
 
 
 
 
 
 
 
77edebf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c1cb5e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77edebf
 
 
 
 
 
 
 
 
d2161b1
 
 
c1cb5e4
0e3b21f
 
a662bfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
02751ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
daae24c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a9408a
daae24c
 
 
 
 
 
 
 
 
 
a662bfa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
"""Unit tests for the report-proxy route.

The Space exposes the per-submission HTML report at
``/reports/{submission_id}.html`` (FastAPI route): it re-serves the
file with ``Content-Type: text/html``. Now that the Space is public,
HF's edge serves this route to in-browser users, so the leaderboard's
submission_name cell links straight to it (opening in a new tab)
rather than inlining the report into the page.

Tests stub the Hub fetch via monkeypatch so the suite has zero
network I/O.
"""
from __future__ import annotations

import types

import pandas as pd

import app
import leaderboard


def test_serve_report_returns_html_when_file_exists(monkeypatch):
    """Successful fetch -> 200 + text/html + body passthrough."""
    monkeypatch.setattr(
        app,
        "_fetch_report_html",
        lambda sid: b"<!DOCTYPE html><html><body>ok</body></html>",
    )
    resp = app.serve_report("sub-test")
    assert resp.status_code == 200
    assert resp.media_type.startswith("text/html")
    assert resp.body == b"<!DOCTYPE html><html><body>ok</body></html>"


def test_serve_report_returns_404_when_file_missing(monkeypatch):
    """``_fetch_report_html`` returning None -> 404 with a small html body."""
    monkeypatch.setattr(app, "_fetch_report_html", lambda sid: None)
    resp = app.serve_report("sub-missing")
    assert resp.status_code == 404
    # 404 body is still HTML so the browser renders the message.
    assert "Report not found" in resp.body.decode("utf-8")


def test_render_resolvers_return_public_bucket_urls():
    """Candidate renders are served straight from the public bucket, no proxy.

    The resolvers must return a stable ``/buckets/<id>/resolve/renders/...``
    URL (anonymous, browser-followable 302) for the plain turntable and the
    edit-diff WebP.
    """
    url = app._render_proxy_url("sub-test", "101")
    assert url.endswith("/renders/sub-test/101/rotating.webp")
    assert "/buckets/" in url and "/resolve/" in url

    diff = app._render_diff_proxy_url("sub-test", "207")
    assert diff.endswith("/renders/sub-test/207/edit_diff.webp")
    assert "/buckets/" in diff and "/resolve/" in diff


def test_serve_gt_render_returns_webp_when_file_exists(monkeypatch):
    """GT render proxy serves generated GT WebP bytes."""
    monkeypatch.setattr(app, "_fetch_gt_render", lambda fixture: b"RIFFwebp")

    resp = app.serve_gt_render("101")

    assert resp.status_code == 200
    assert resp.media_type == "image/webp"
    assert resp.body == b"RIFFwebp"


def test_fetch_report_html_returns_none_on_hub_failure(monkeypatch):
    """A Hub-side exception is caught and surfaced as None.

    The serve handler relies on this to keep a transient Hub blip
    from leaking a stack trace into the Space's HTTP response.
    """
    def boom(*a, **kw):
        raise RuntimeError("simulated Hub failure")

    monkeypatch.setattr(app, "hf_hub_download", boom)
    # The lru_cache on _fetch_report_html caches by arg; use a unique
    # id per test so prior runs don't shortcut this one.
    assert app._fetch_report_html("sub-failure-probe-unique-1") is None


def test_fetch_gt_render_uses_gt_rotating_webp_path(monkeypatch, tmp_path):
    """GT WebPs are fetched from the GT dataset's per-fixture render folder."""
    webp = tmp_path / "rotating.webp"
    webp.write_bytes(b"RIFFwebp")
    captured: dict = {}

    def fake_download(**kwargs):
        captured.update(kwargs)
        return str(webp)

    monkeypatch.setattr(app, "hf_hub_download", fake_download)

    assert app._fetch_gt_render("101") == b"RIFFwebp"
    assert captured["filename"] == "101/renders/rotating.webp"


def test_proxy_route_is_registered():
    """The mounted FastAPI app exposes ``/reports/{submission_id}.html`` as GET.

    Catches the regression where the ``add_api_route`` call moves
    below ``mount_gradio_app`` (which would still register the route
    but make this regression silent until someone tries to hit it).
    """
    routes = [getattr(r, "path", None) for r in app.app.routes]
    assert "/reports/{submission_id}.html" in routes
    # Candidate renders moved to the public bucket; only the private GT render
    # still needs a token-holding Space proxy route.
    assert "/render/{submission_id}/{fixture}.webp" not in routes
    assert "/gt-render/{fixture}.webp" in routes


# --- Boot resilience: no silent fallback, but no crash either -------
#
# leaderboard.load_leaderboard_split / load_admin_table *raise*
# LeaderboardDataError on any Hub read failure (no fallback to stale
# or bundled data). app.py must turn that into empty tables + a loud
# banner / toast rather than crash at construction time (which would
# take the whole Space down on an under-scoped HF_TOKEN).


def test_safe_load_split_returns_empty_and_error_on_hub_failure(monkeypatch):
    """A failed Hub read yields empty, correctly-shaped frames + a message."""
    def boom():
        raise leaderboard.LeaderboardDataError("simulated hub failure")

    monkeypatch.setattr(app, "load_leaderboard_split", boom)
    validated, unvalidated, error = app._safe_load_split()
    assert error is not None
    assert "simulated hub failure" in error
    assert len(validated) == 0
    assert len(unvalidated) == 0
    # Empty frames keep the declared column shape so the widgets stay
    # consistent with their datatypes.
    assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS
    assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS


def test_safe_load_split_passes_through_on_success(monkeypatch):
    """On success the wrapper returns the frames untouched with no error."""
    v = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
    u = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
    monkeypatch.setattr(app, "load_leaderboard_split", lambda: (v, u))
    validated, unvalidated, error = app._safe_load_split()
    assert error is None
    assert validated is v
    assert unvalidated is u


def test_safe_load_admin_returns_empty_and_error_on_hub_failure(monkeypatch):
    """Admin counterpart: empty admin frame + message, no exception."""
    def boom():
        raise leaderboard.LeaderboardDataError("simulated admin hub failure")

    monkeypatch.setattr(app, "load_admin_table", boom)
    admin_df, error = app._safe_load_admin()
    assert error is not None
    assert len(admin_df) == 0
    assert list(admin_df.columns) == leaderboard.ADMIN_COLUMNS


def test_gate_admin_controls_refreshes_live_table(monkeypatch):
    """Page/auth load refreshes the admin table, not just interactivity."""
    live_df = pd.DataFrame(
        [
            {
                "select": False,
                "validation_status": "unvalidated",
                "validation_method": None,
                "submission_name": "UC3 e2e test 20260602-205316",
                "submitter_name": "michaelr27",
                "submitted_at": "2026-06-02 19:00 UTC",
                "status": "completed",
                "aggregate_score": 0.5853,
                "submission_id": "uc3-e2e",
            }
        ],
        columns=leaderboard.ADMIN_COLUMNS,
    )
    monkeypatch.setattr(app, "_safe_load_admin", lambda: (live_df, None))
    monkeypatch.setattr(app, "is_admin", lambda profile: True)

    table_update = app._gate_admin_controls(types.SimpleNamespace(username="michaelr27"))[0]

    assert table_update.value["headers"] == leaderboard.ADMIN_COLUMNS
    assert table_update.value["data"][0][3] == "UC3 e2e test 20260602-205316"
    assert table_update.interactive is True


def test_admin_delete_refreshes_gallery(monkeypatch):
    """Deleting rows also replaces the Gallery iframe srcdoc."""
    table_df = pd.DataFrame(
        [
            {
                "select": True,
                "submission_id": "validated-old-row",
            }
        ]
    )
    empty_admin = pd.DataFrame(columns=leaderboard.ADMIN_COLUMNS)
    empty_validated = pd.DataFrame(columns=leaderboard.VALIDATED_LEADERBOARD_COLS)
    empty_unvalidated = pd.DataFrame(columns=leaderboard.LEADERBOARD_COLS)
    deleted: list[list[str]] = []

    monkeypatch.setattr(app, "is_admin", lambda profile: True)
    monkeypatch.setattr(app, "delete_rows", lambda ids: deleted.append(ids))
    monkeypatch.setattr(
        app, "_safe_load_split", lambda: (empty_validated, empty_unvalidated, None)
    )
    monkeypatch.setattr(app, "_safe_load_admin", lambda: (empty_admin, None))
    monkeypatch.setattr(app, "_gallery_iframe_html", lambda: "<iframe>empty</iframe>")
    monkeypatch.setattr(app.gr, "Info", lambda *a, **k: None)

    admin_df, validated, unvalidated, gallery_html, _confirm, _delete_btn, _stop_btn = (
        app._admin_delete(table_df, True, types.SimpleNamespace(username="michaelr27"))
    )

    assert deleted == [["validated-old-row"]]
    assert admin_df is empty_admin
    assert validated is empty_validated
    assert unvalidated is empty_unvalidated
    assert gallery_html == "<iframe>empty</iframe>"


def test_data_error_banner_md_present_on_error_empty_otherwise():
    """Banner markdown is non-empty (and names the cause) only on error."""
    assert app._data_error_banner_md(None) == ""
    assert app._data_error_banner_md("") == ""
    banner = app._data_error_banner_md("boom: 403 Forbidden")
    assert "boom: 403 Forbidden" in banner
    assert "unavailable" in banner.lower()


def test_refresh_handler_shows_banner_and_warns_on_error(monkeypatch):
    """Manual refresh surfaces the failure loudly: visible banner + warning toast.

    ``gr.Warning`` / ``gr.Info`` are stubbed so the test runs outside a
    Gradio request context; the assertion is that a failure path fires
    a warning (not an info) and flips the banner visible.
    """
    def boom():
        raise leaderboard.LeaderboardDataError("simulated hub failure")

    monkeypatch.setattr(app, "load_leaderboard_split", boom)
    toasts = {"warning": 0, "info": 0}
    monkeypatch.setattr(app.gr, "Warning", lambda *a, **k: toasts.__setitem__("warning", toasts["warning"] + 1))
    monkeypatch.setattr(app.gr, "Info", lambda *a, **k: toasts.__setitem__("info", toasts["info"] + 1))

    validated, unvalidated, banner = app._refresh_leaderboard_with_toast()
    assert toasts["warning"] == 1
    assert toasts["info"] == 0
    assert len(validated) == 0 and len(unvalidated) == 0
    # The banner output is a gr.Markdown update flipped visible.
    assert getattr(banner, "visible", None) is True