| """Unit tests for the two-tier leaderboard reader. |
| |
| C2 contract: :func:`leaderboard.load_leaderboard_split` returns a |
| ``(validated_df, unvalidated_df)`` tuple, split on ``validation_status``, |
| with legacy rows defaulting to ``"unvalidated"``. |
| |
| Tests stub the Hub fetcher via ``monkeypatch`` so no network I/O runs. |
| """ |
| from __future__ import annotations |
|
|
| import pytest |
|
|
| import leaderboard |
|
|
|
|
| def _stub_rows(): |
| """Three rows: one validated, one explicit-unvalidated, one legacy. |
| |
| Each row carries the full metadata shape so column-presence assertions |
| work without further fixturing. |
| """ |
| return [ |
| { |
| "submission_id": "sub-a", |
| "status": "completed", |
| "validation_status": "validated", |
| "validation_method": "code", |
| "submitter_name": "team-alpha", |
| "submission_name": "Alpha Agent v1", |
| "aggregate_score": 0.91, |
| "validity_rate": 0.95, |
| "submitted_at": "2026-05-01T10:00:00Z", |
| "cadgenbench_version": "0.1.0", |
| "hf_username": "alpha", |
| "agent_url": "https://github.com/example/alpha-agent", |
| "submission_blob_url": ( |
| "https://huggingface.co/datasets/test/sub-a.zip" |
| ), |
| |
| |
| "submission_sha256": "a" * 64, |
| }, |
| { |
| "submission_id": "sub-b", |
| "status": "completed", |
| "validation_status": "unvalidated", |
| "validation_method": None, |
| "submitter_name": "team-beta", |
| "submission_name": "Beta Agent v2", |
| "aggregate_score": 0.82, |
| "validity_rate": 0.88, |
| "submitted_at": "2026-05-02T10:00:00Z", |
| "cadgenbench_version": "0.1.0", |
| "hf_username": "beta", |
| "agent_url": None, |
| "submission_blob_url": ( |
| "https://huggingface.co/datasets/test/sub-b.zip" |
| ), |
| "submission_sha256": "b" * 64, |
| }, |
| |
| |
| |
| |
| |
| { |
| "submission_id": "sub-c-legacy", |
| "submitter_name": "team-gamma", |
| "submission_name": "Gamma baseline", |
| "aggregate_score": 0.50, |
| "validity_rate": 0.60, |
| "submitted_at": "2026-01-01T10:00:00Z", |
| "cadgenbench_version": "0.0.5", |
| "agent_url": "https://github.com/example/gamma-baseline", |
| "submission_blob_url": ( |
| "https://huggingface.co/datasets/test/sub-c-legacy.zip" |
| ), |
| }, |
| ] |
|
|
|
|
| def test_split_shape(monkeypatch): |
| """(a) Split shape: one row validated, two rows unvalidated.""" |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
| assert len(validated) == 1 |
| assert len(unvalidated) == 2 |
|
|
|
|
| def test_legacy_row_defaults_to_unvalidated(monkeypatch): |
| """(b) Legacy row with no `validation_status` field lands unvalidated. |
| |
| Legacy rows keep plain-text submission_name (no report exists), |
| so identity check is straightforward equality. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
| assert "Gamma baseline" in set(unvalidated["submission_name"].tolist()) |
| assert "Gamma baseline" not in set(validated["submission_name"].tolist()) |
|
|
|
|
| def test_field_passthrough(monkeypatch): |
| """(c) Non-formatted metadata fields and validated-only columns pass through. |
| |
| `aggregate_score` and `validity_rate` get status-aware string |
| formatting; `submission_name` gets wrapped into a markdown link |
| on modern rows. Passthrough is checked on fields that survive |
| untransformed (``submitter_name``, ``submitted_at``, |
| ``cadgenbench_version``) plus the validated-table-only |
| ``validation_method``. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
|
|
| assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS |
| |
| |
| beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0] |
| |
| |
| assert beta["submitted_at"] == "2026-05-02 10:00 UTC" |
| assert beta["cadgenbench_version"] == "0.1.0" |
|
|
| assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS |
| alpha = validated[validated["submitter_name"] == "team-alpha"].iloc[0] |
| assert alpha["validation_method"] == "code" |
|
|
|
|
| def test_empty_input_returns_two_empty_frames(monkeypatch): |
| """Empty input yields two empty DataFrames carrying the expected columns.""" |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: []) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
| assert validated.empty |
| assert unvalidated.empty |
| assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS |
| assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS |
|
|
|
|
| def test_hub_read_failure_raises_no_silent_fallback(monkeypatch): |
| """A failed Hub read surfaces loudly; the leaderboard never serves |
| bundled/stale fallback data in its place.""" |
| def _boom(): |
| raise leaderboard.LeaderboardDataError("simulated hub failure") |
|
|
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", _boom) |
| with pytest.raises(leaderboard.LeaderboardDataError): |
| leaderboard.load_leaderboard_split() |
|
|
|
|
| def test_submission_name_links_to_report_in_new_tab(monkeypatch): |
| """`submission_name` deep-links to the report in a new tab when one exists. |
| |
| Now that the Space is public, the name cell is an anchor with |
| ``target="_blank"`` pointing at the ``/reports/<id>.html`` route |
| (completed modern-pipeline rows only). Rows without a report |
| (legacy / pre-pipeline, no ``submission_sha256``) stay plain text. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
| |
| alpha = validated.iloc[0] |
| assert alpha["report_url"] == "/reports/sub-a.html" |
| assert alpha["submission_name"] == ( |
| '<a href="/reports/sub-a.html" target="_blank" rel="noopener">' |
| "Alpha Agent v1</a>" |
| ) |
| beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0] |
| assert beta["submission_name"] == ( |
| '<a href="/reports/sub-b.html" target="_blank" rel="noopener">' |
| "Beta Agent v2</a>" |
| ) |
| |
| gamma = unvalidated[unvalidated["submitter_name"] == "team-gamma"].iloc[0] |
| assert gamma["report_url"] == "" |
| assert gamma["submission_name"] == "Gamma baseline" |
|
|
|
|
| def test_model_details_column_renders(monkeypatch): |
| """`model details (optional)` cell carries the agent URL or `_None_`. |
| |
| Cell uses the shortened URL as link text (honest about what's |
| behind the click). Missing agent_url -> italic placeholder. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| validated, unvalidated = leaderboard.load_leaderboard_split() |
|
|
| alpha = validated.iloc[0] |
| assert alpha["model details (optional)"] == ( |
| "[github.com/example/alpha-agent](https://github.com/example/alpha-agent)" |
| ) |
|
|
| beta = unvalidated[ |
| unvalidated["submission_name"].str.contains("Beta Agent v2", regex=False) |
| ].iloc[0] |
| assert beta["model details (optional)"] == "_None_" |
|
|
|
|
| def test_build_combined_csv_has_discriminator_and_both_tiers(monkeypatch, tmp_path): |
| """C8: the CSV combines both tables with a `validation_status` column. |
| |
| Parses the file back with pandas and asserts: |
| - the discriminator column is present; |
| - both "validated" and "unvalidated" rows show up; |
| - identity + score fields survive the export. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| path = leaderboard.build_combined_csv() |
| import pandas as pd |
| df = pd.read_csv(path) |
|
|
| assert "validation_status" in df.columns |
| statuses = set(df["validation_status"].tolist()) |
| assert "validated" in statuses |
| assert "unvalidated" in statuses |
|
|
| |
| alpha = df[df["submission_id"] == "sub-a"].iloc[0] |
| assert alpha["submitter_name"] == "team-alpha" |
| assert float(alpha["aggregate_score"]) == 0.91 |
|
|
| |
| legacy = df[df["submission_id"] == "sub-c-legacy"].iloc[0] |
| assert legacy["status"] == "completed" |
| assert legacy["validation_status"] == "unvalidated" |
|
|
|
|
| def test_build_combined_csv_handles_empty_input(monkeypatch): |
| """Empty source rows -> empty CSV with the declared columns + header.""" |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: []) |
| path = leaderboard.build_combined_csv() |
| import pandas as pd |
| df = pd.read_csv(path) |
| assert len(df) == 0 |
| assert list(df.columns) == leaderboard.CSV_COLUMNS |
|
|
|
|
| def test_build_combined_csv_orders_validated_first(monkeypatch): |
| """Sort: validated tier on top (by score desc), then unvalidated. |
| |
| Mirrors the on-screen layout so a reader diffing the CSV against |
| the UI sees the same ordering. |
| """ |
| monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) |
| path = leaderboard.build_combined_csv() |
| import pandas as pd |
| df = pd.read_csv(path) |
| statuses_in_order = df["validation_status"].tolist() |
| first_unvalidated = statuses_in_order.index("unvalidated") |
| |
| for s in statuses_in_order[:first_unvalidated]: |
| assert s == "validated", f"unexpected status before unvalidated tier: {s!r}" |
|
|
|
|
| def test_fmt_timestamp_formats_iso_and_passes_through_garbage(): |
| """ISO ``YYYY-MM-DDTHH:MM:SSZ`` -> ``YYYY-MM-DD HH:MM UTC``; garbage stays. |
| |
| Empty / None / NaN render as the empty string (the cell is |
| rendered blank rather than as a literal placeholder). |
| """ |
| assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC" |
| assert leaderboard._fmt_timestamp(None) == "" |
| assert leaderboard._fmt_timestamp("") == "" |
| assert leaderboard._fmt_timestamp(" ") == "" |
| assert leaderboard._fmt_timestamp(float("nan")) == "" |
| |
| |
| |
| assert leaderboard._fmt_timestamp("not-a-timestamp") == "not-a-timestamp" |
|
|
|
|
| def test_datatypes_align_with_columns(): |
| """Per-column datatype lists track the column-list lengths. |
| |
| The Leaderboard widget needs `datatype` to match `value`'s column |
| count exactly, so this is the cheap regression guard against |
| forgetting to extend one when the other grows. |
| """ |
| assert ( |
| len(leaderboard.LEADERBOARD_DATATYPES) |
| == len(leaderboard.LEADERBOARD_COLS) |
| ) |
| assert ( |
| len(leaderboard.VALIDATED_LEADERBOARD_DATATYPES) |
| == len(leaderboard.VALIDATED_LEADERBOARD_COLS) |
| ) |
| |
| markdown_cols = {"submission_name", "model details (optional)"} |
| for col, dt in zip( |
| leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES |
| ): |
| if col in markdown_cols: |
| assert dt == "markdown", f"{col} should be markdown" |
| else: |
| assert dt == "str", f"{col} should be str" |
|
|