"""Unit tests for the admin promote / demote helpers. C5 contract: the admin gate distinguishes in-set from out-of-set users, ``promote_row`` and ``demote_row`` flip the two validation fields on the right row, and a repeat promotion is idempotent. Every ``results.jsonl`` read and write is mocked, so the suite makes zero Hub calls. """ from __future__ import annotations import json import os from types import SimpleNamespace import pytest # submit.py kicks off a Hub-touching stuck-pending sweep at import. # Disable it before importing (admin imports submit) so running this # file in isolation stays offline. os.environ.setdefault("CADGENBENCH_DISABLE_BOOT_SWEEP", "1") import admin # noqa: E402 import submit # noqa: E402 SEED_ROWS = [ { "submission_id": "alpha", "validation_status": "unvalidated", "validation_method": None, "aggregate_score": 0.5, }, { "submission_id": "beta", "validation_status": "validated", "validation_method": "code", "aggregate_score": 0.9, }, ] def _jsonl(rows: list[dict]) -> str: return "\n".join(json.dumps(r) for r in rows) + "\n" def _row(rows: list[dict], submission_id: str) -> dict: return next(r for r in rows if r["submission_id"] == submission_id) @pytest.fixture def hub(monkeypatch): """Mock the results.jsonl read + write that ``_hub_rmw_results`` drives. ``state["rows"]`` starts as a copy of :data:`SEED_ROWS` and is replaced by whatever bytes the helper hands to ``upload_file``, re-parsed back into dicts. ``state["uploads"]`` counts the writes so a test can assert how many commits a call produced. """ state: dict = { "rows": [dict(r) for r in SEED_ROWS], "uploads": 0, "deleted_paths": [], } def fake_download() -> str: return _jsonl(state["rows"]) def fake_upload(*, path_or_fileobj, **kwargs) -> None: body = ( path_or_fileobj.decode("utf-8") if isinstance(path_or_fileobj, bytes) else path_or_fileobj ) state["rows"] = [ json.loads(line) for line in body.splitlines() if line.strip() ] state["uploads"] += 1 def fake_delete_file(*, path_in_repo, **kwargs) -> None: state["deleted_paths"].append(path_in_repo) monkeypatch.setattr(submit, "_download_results_jsonl", fake_download) monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload) monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file) return state def test_admin_gate_in_set_vs_out_of_set(monkeypatch): """is_admin admits logged-in users in the set, rejects everyone else.""" monkeypatch.setenv("CADGENBENCH_ADMINS", "michaelr27, lvwerra") assert admin.is_admin(SimpleNamespace(username="michaelr27")) is True assert admin.is_admin(SimpleNamespace(username="lvwerra")) is True assert admin.is_admin(SimpleNamespace(username="someone-else")) is False # Logged-out is never admin. assert admin.is_admin(None) is False # Empty / unset variable means no one is admin. monkeypatch.delenv("CADGENBENCH_ADMINS", raising=False) assert admin.is_admin(SimpleNamespace(username="michaelr27")) is False def test_promote_happy_path(hub): """Promoting an unvalidated row sets status + method, leaves others alone.""" admin.promote_row("alpha", "manual") promoted = _row(hub["rows"], "alpha") assert promoted["validation_status"] == "validated" assert promoted["validation_method"] == "manual" # The other row is untouched. assert _row(hub["rows"], "beta")["validation_method"] == "code" assert hub["uploads"] == 1 def test_demote_happy_path(hub): """Demoting a validated row clears the method and flips status back.""" admin.demote_row("beta") demoted = _row(hub["rows"], "beta") assert demoted["validation_status"] == "unvalidated" assert demoted["validation_method"] is None assert hub["uploads"] == 1 def test_promote_idempotent(hub): """Re-promoting an already-validated row lands the same state.""" admin.promote_row("beta", "code") once = dict(_row(hub["rows"], "beta")) assert once["validation_status"] == "validated" assert once["validation_method"] == "code" # Second identical promotion produces an identical row. admin.promote_row("beta", "code") assert _row(hub["rows"], "beta") == once def test_promote_rows_bulk(hub): """A bulk promote flips every listed row in one write.""" admin.promote_rows(["alpha", "beta"], "traces") for sid in ("alpha", "beta"): row = _row(hub["rows"], sid) assert row["validation_status"] == "validated" assert row["validation_method"] == "traces" assert hub["uploads"] == 1 def test_demote_rows_bulk(hub): """A bulk demote clears method on every listed row in one write.""" admin.demote_rows(["alpha", "beta"]) for sid in ("alpha", "beta"): row = _row(hub["rows"], sid) assert row["validation_status"] == "unvalidated" assert row["validation_method"] is None assert hub["uploads"] == 1 def test_promote_rows_missing_id_raises_without_write(hub): """An unknown id aborts the whole batch before any upload.""" with pytest.raises(LookupError): admin.promote_rows(["alpha", "ghost"], "code") assert hub["uploads"] == 0 # alpha is untouched since the write never happened. assert _row(hub["rows"], "alpha")["validation_status"] == "unvalidated" def test_empty_selection_raises(hub): """Bulk helpers reject an empty / all-falsy selection.""" for call in ( lambda: admin.promote_rows([], "code"), lambda: admin.demote_rows([None, ""]), lambda: admin.delete_rows([]), ): with pytest.raises(ValueError): call() assert hub["uploads"] == 0 def test_delete_rows_removes_rows_and_artifacts(hub): """Delete drops the rows and best-effort removes their artifacts.""" admin.delete_rows(["alpha"]) remaining = {r["submission_id"] for r in hub["rows"]} assert remaining == {"beta"} # All three companion blobs were targeted for deletion. assert hub["deleted_paths"] == [ "submissions/alpha.zip", "reports/alpha.html", "reports/alpha.json", ] assert hub["uploads"] == 1 def _job(job_id: str, stage: str, *args: str) -> SimpleNamespace: """A minimal JobInfo stand-in: id, status.stage, and a command argv.""" return SimpleNamespace( id=job_id, status=SimpleNamespace(stage=stage, message=None), command=["python", "/opt/eval_job.py", *args], arguments=None, ) @pytest.fixture def jobs(monkeypatch): """Mock the Jobs API (``list_jobs`` / ``cancel_job``) admin imports. ``state["jobs"]`` is the list ``list_jobs`` returns; ``state["cancelled"]`` records every ``job_id`` a ``cancel_job`` call targeted, so a test can assert exactly which jobs were stopped. """ state: dict = {"jobs": [], "cancelled": []} def fake_list_jobs(*, namespace=None, token=None): return state["jobs"] def fake_cancel_job(*, job_id, namespace=None, token=None): state["cancelled"].append(job_id) monkeypatch.setattr(admin, "list_jobs", fake_list_jobs) monkeypatch.setattr(admin, "cancel_job", fake_cancel_job) return state def test_stop_and_delete_cancels_running_then_deletes(hub, jobs): """A running job whose command names the id is cancelled, then the row goes.""" jobs["jobs"] = [ _job("job-alpha", "RUNNING", "alpha", "https://blob/alpha.zip"), _job("job-beta", "RUNNING", "beta", "https://blob/beta.zip"), ] admin.stop_and_delete_rows(["alpha"]) # Only alpha's job was cancelled. assert jobs["cancelled"] == ["job-alpha"] # And alpha's row + artifacts are gone, beta untouched. assert {r["submission_id"] for r in hub["rows"]} == {"beta"} assert "submissions/alpha.zip" in hub["deleted_paths"] def test_stop_and_delete_catches_all_shard_jobs(hub, jobs): """Every shard job for a submission (same id in argv) is cancelled.""" jobs["jobs"] = [ _job("job-a0", "RUNNING", "alpha", "url", "--shard-id", "shard_000"), _job("job-a1", "RUNNING", "alpha", "url", "--shard-id", "shard_001"), ] admin.stop_and_delete_rows(["alpha"]) assert sorted(jobs["cancelled"]) == ["job-a0", "job-a1"] def test_stop_and_delete_skips_terminal_jobs(hub, jobs): """A finished job for the id is not cancelled, but the row still deletes.""" jobs["jobs"] = [ _job("job-alpha", "COMPLETED", "alpha", "url"), ] admin.stop_and_delete_rows(["alpha"]) assert jobs["cancelled"] == [] assert {r["submission_id"] for r in hub["rows"]} == {"beta"} def test_stop_and_delete_tolerates_list_jobs_failure(hub, monkeypatch): """A Jobs-API listing failure must not block the row delete.""" def boom(*, namespace=None, token=None): raise RuntimeError("jobs API down") monkeypatch.setattr(admin, "list_jobs", boom) admin.stop_and_delete_rows(["alpha"]) assert {r["submission_id"] for r in hub["rows"]} == {"beta"} def test_stop_and_delete_tolerates_cancel_failure(hub, jobs, monkeypatch): """A cancel that errors is swallowed; the row still deletes.""" jobs["jobs"] = [_job("job-alpha", "RUNNING", "alpha", "url")] def boom(*, job_id, namespace=None, token=None): raise RuntimeError("cancel rejected") monkeypatch.setattr(admin, "cancel_job", boom) admin.stop_and_delete_rows(["alpha"]) assert {r["submission_id"] for r in hub["rows"]} == {"beta"} def test_stop_and_delete_empty_selection_raises(hub, jobs): """An empty selection is a caller error, before any job/list work.""" with pytest.raises(ValueError): admin.stop_and_delete_rows([]) assert jobs["cancelled"] == [] assert hub["uploads"] == 0