"""Unit tests for the two-tier leaderboard reader. C2 contract: :func:`leaderboard.load_leaderboard_split` returns a ``(validated_df, unvalidated_df)`` tuple, split on ``validation_status``, with legacy rows defaulting to ``"unvalidated"``. Tests stub the Hub fetcher via ``monkeypatch`` so no network I/O runs. """ from __future__ import annotations import pytest import leaderboard def _stub_rows(): """Three rows: one validated, one explicit-unvalidated, one legacy. Each row carries the full metadata shape so column-presence assertions work without further fixturing. """ return [ { "submission_id": "sub-a", "status": "completed", "validation_status": "validated", "validation_method": "code", "submitter_name": "team-alpha", "submission_name": "Alpha Agent v1", "aggregate_score": 0.91, "validity_rate": 0.95, "submitted_at": "2026-05-01T10:00:00Z", "cadgenbench_version": "0.1.0", "hf_username": "alpha", "agent_url": "https://github.com/example/alpha-agent", "submission_blob_url": ( "https://huggingface.co/datasets/test/sub-a.zip" ), # Modern submit pipeline: sha256 is populated, so report # links should be emitted on completed rows. "submission_sha256": "a" * 64, }, { "submission_id": "sub-b", "status": "completed", "validation_status": "unvalidated", "validation_method": None, "submitter_name": "team-beta", "submission_name": "Beta Agent v2", "aggregate_score": 0.82, "validity_rate": 0.88, "submitted_at": "2026-05-02T10:00:00Z", "cadgenbench_version": "0.1.0", "hf_username": "beta", "agent_url": None, "submission_blob_url": ( "https://huggingface.co/datasets/test/sub-b.zip" ), "submission_sha256": "b" * 64, }, # Legacy row: pre-schema-bump shape. No `validation_status` key, # no `status` key, no `submission_sha256`. Both `status` and # `validation_status` should be defaulted by the reader; the # missing sha256 must suppress the report link (the # corresponding reports/.html doesn't exist on the dataset). { "submission_id": "sub-c-legacy", "submitter_name": "team-gamma", "submission_name": "Gamma baseline", "aggregate_score": 0.50, "validity_rate": 0.60, "submitted_at": "2026-01-01T10:00:00Z", "cadgenbench_version": "0.0.5", "agent_url": "https://github.com/example/gamma-baseline", "submission_blob_url": ( "https://huggingface.co/datasets/test/sub-c-legacy.zip" ), }, ] def test_split_shape(monkeypatch): """(a) Split shape: one row validated, two rows unvalidated.""" monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) validated, unvalidated = leaderboard.load_leaderboard_split() assert len(validated) == 1 assert len(unvalidated) == 2 def test_legacy_row_defaults_to_unvalidated(monkeypatch): """(b) Legacy row with no `validation_status` field lands unvalidated. Legacy rows keep plain-text submission_name (no report exists), so identity check is straightforward equality. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) validated, unvalidated = leaderboard.load_leaderboard_split() assert "Gamma baseline" in set(unvalidated["submission_name"].tolist()) assert "Gamma baseline" not in set(validated["submission_name"].tolist()) def test_field_passthrough(monkeypatch): """(c) Non-formatted metadata fields and validated-only columns pass through. `aggregate_score` and `validity_rate` get status-aware string formatting; `submission_name` gets wrapped into a markdown link on modern rows. Passthrough is checked on fields that survive untransformed (``submitter_name``, ``submitted_at``, ``cadgenbench_version``) plus the validated-table-only ``validation_method``. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) validated, unvalidated = leaderboard.load_leaderboard_split() assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS # Use submitter_name to identify Beta (its submission_name is now # wrapped as a markdown link). beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0] # submitted_at is rendered as `YYYY-MM-DD HH:MM UTC` by # _fmt_timestamp; the underlying ISO-8601 string is the input. assert beta["submitted_at"] == "2026-05-02 10:00 UTC" assert beta["cadgenbench_version"] == "0.1.0" assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS alpha = validated[validated["submitter_name"] == "team-alpha"].iloc[0] assert alpha["validation_method"] == "code" def test_empty_input_returns_two_empty_frames(monkeypatch): """Empty input yields two empty DataFrames carrying the expected columns.""" monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: []) validated, unvalidated = leaderboard.load_leaderboard_split() assert validated.empty assert unvalidated.empty assert list(validated.columns) == leaderboard.VALIDATED_LEADERBOARD_COLS assert list(unvalidated.columns) == leaderboard.LEADERBOARD_COLS def test_hub_read_failure_raises_no_silent_fallback(monkeypatch): """A failed Hub read surfaces loudly; the leaderboard never serves bundled/stale fallback data in its place.""" def _boom(): raise leaderboard.LeaderboardDataError("simulated hub failure") monkeypatch.setattr(leaderboard, "_load_rows_from_hub", _boom) with pytest.raises(leaderboard.LeaderboardDataError): leaderboard.load_leaderboard_split() def test_submission_name_links_to_report_in_new_tab(monkeypatch): """`submission_name` deep-links to the report in a new tab when one exists. Now that the Space is public, the name cell is an anchor with ``target="_blank"`` pointing at the ``/reports/.html`` route (completed modern-pipeline rows only). Rows without a report (legacy / pre-pipeline, no ``submission_sha256``) stay plain text. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) validated, unvalidated = leaderboard.load_leaderboard_split() # Modern completed rows -> new-tab anchor to their report route. alpha = validated.iloc[0] assert alpha["report_url"] == "/reports/sub-a.html" assert alpha["submission_name"] == ( '' "Alpha Agent v1" ) beta = unvalidated[unvalidated["submitter_name"] == "team-beta"].iloc[0] assert beta["submission_name"] == ( '' "Beta Agent v2" ) # Legacy row without a report -> plain text, no anchor. gamma = unvalidated[unvalidated["submitter_name"] == "team-gamma"].iloc[0] assert gamma["report_url"] == "" assert gamma["submission_name"] == "Gamma baseline" def test_model_details_column_renders(monkeypatch): """`model details (optional)` cell carries the agent URL or `_None_`. Cell uses the shortened URL as link text (honest about what's behind the click). Missing agent_url -> italic placeholder. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) validated, unvalidated = leaderboard.load_leaderboard_split() alpha = validated.iloc[0] assert alpha["model details (optional)"] == ( "[github.com/example/alpha-agent](https://github.com/example/alpha-agent)" ) beta = unvalidated[ unvalidated["submission_name"].str.contains("Beta Agent v2", regex=False) ].iloc[0] assert beta["model details (optional)"] == "_None_" def test_build_combined_csv_has_discriminator_and_both_tiers(monkeypatch, tmp_path): """C8: the CSV combines both tables with a `validation_status` column. Parses the file back with pandas and asserts: - the discriminator column is present; - both "validated" and "unvalidated" rows show up; - identity + score fields survive the export. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) path = leaderboard.build_combined_csv() import pandas as pd df = pd.read_csv(path) assert "validation_status" in df.columns statuses = set(df["validation_status"].tolist()) assert "validated" in statuses assert "unvalidated" in statuses # Spot-check identity + score field passthrough. alpha = df[df["submission_id"] == "sub-a"].iloc[0] assert alpha["submitter_name"] == "team-alpha" assert float(alpha["aggregate_score"]) == 0.91 # Legacy row defaults applied (status + validation_status). legacy = df[df["submission_id"] == "sub-c-legacy"].iloc[0] assert legacy["status"] == "completed" assert legacy["validation_status"] == "unvalidated" def test_build_combined_csv_handles_empty_input(monkeypatch): """Empty source rows -> empty CSV with the declared columns + header.""" monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: []) path = leaderboard.build_combined_csv() import pandas as pd df = pd.read_csv(path) assert len(df) == 0 assert list(df.columns) == leaderboard.CSV_COLUMNS def test_build_combined_csv_orders_validated_first(monkeypatch): """Sort: validated tier on top (by score desc), then unvalidated. Mirrors the on-screen layout so a reader diffing the CSV against the UI sees the same ordering. """ monkeypatch.setattr(leaderboard, "_load_rows_from_hub", lambda: _stub_rows()) path = leaderboard.build_combined_csv() import pandas as pd df = pd.read_csv(path) statuses_in_order = df["validation_status"].tolist() first_unvalidated = statuses_in_order.index("unvalidated") # Every entry before the first "unvalidated" is "validated". for s in statuses_in_order[:first_unvalidated]: assert s == "validated", f"unexpected status before unvalidated tier: {s!r}" def test_fmt_timestamp_formats_iso_and_passes_through_garbage(): """ISO ``YYYY-MM-DDTHH:MM:SSZ`` -> ``YYYY-MM-DD HH:MM UTC``; garbage stays. Empty / None / NaN render as the empty string (the cell is rendered blank rather than as a literal placeholder). """ assert leaderboard._fmt_timestamp("2026-05-28T07:13:16Z") == "2026-05-28 07:13 UTC" assert leaderboard._fmt_timestamp(None) == "" assert leaderboard._fmt_timestamp("") == "" assert leaderboard._fmt_timestamp(" ") == "" assert leaderboard._fmt_timestamp(float("nan")) == "" # Anything that doesn't match the canonical shape passes through # unchanged (e.g., a manually-edited cell or a legacy timestamp # format) so the visible cell is at least not blank-replaced. assert leaderboard._fmt_timestamp("not-a-timestamp") == "not-a-timestamp" def test_datatypes_align_with_columns(): """Per-column datatype lists track the column-list lengths. The Leaderboard widget needs `datatype` to match `value`'s column count exactly, so this is the cheap regression guard against forgetting to extend one when the other grows. """ assert ( len(leaderboard.LEADERBOARD_DATATYPES) == len(leaderboard.LEADERBOARD_COLS) ) assert ( len(leaderboard.VALIDATED_LEADERBOARD_DATATYPES) == len(leaderboard.VALIDATED_LEADERBOARD_COLS) ) # Markdown cells: the two link columns. Everything else is str. markdown_cols = {"submission_name", "model details (optional)"} for col, dt in zip( leaderboard.LEADERBOARD_COLS, leaderboard.LEADERBOARD_DATATYPES ): if col in markdown_cols: assert dt == "markdown", f"{col} should be markdown" else: assert dt == "str", f"{col} should be str"