Spaces:

HuggingAI4Engineering
/

CADGenBench

Running

Michael Rabinovich commited on May 30

Commit

0957a56

1 Parent(s): 007a50e

admin: bulk promote/demote/delete + admin table loader

adds load_admin_table to the leaderboard reader: a single flat frame of
every row (both tiers) with a leading editable select column, for the
admin tab to act on. reworks admin.py around bulk helpers
(promote_rows, demote_rows, delete_rows) that each do one results.jsonl
write for the whole selection; delete also best-effort removes the
companion zip and report artifacts. the singular promote_row/demote_row
stay as thin wrappers. tests cover the bulk paths, a missing-id abort,
empty-selection guards, and artifact deletion, all with the hub mocked.

Files changed (3) hide show

admin.py +112 -26
leaderboard.py +61 -0
tests/test_admin.py +64 -1

admin.py CHANGED Viewed

@@ -14,11 +14,18 @@ from __future__ import annotations
 import logging
 import os
-from typing import Any
 import gradio as gr
-from submit import _hub_rmw_results
 logger = logging.getLogger(__name__)
@@ -54,59 +61,138 @@ def is_admin(profile: gr.OAuthProfile | None) -> bool:
     return profile.username in admin_usernames()
-def promote_row(submission_id: str, method: str) -> None:
-    """Move a row into the validated tier with the given evidence type.
-    Sets ``validation_status`` to ``"validated"`` and
-    ``validation_method`` to *method*. Idempotent: re-promoting an
-    already-validated row to the same method lands the same values.
     Raises:
-        ValueError: *method* is not one of :data:`VALID_METHODS`.
-        LookupError: no row in ``results.jsonl`` carries *submission_id*.
     """
     if method not in VALID_METHODS:
         raise ValueError(
             f"Unknown validation_method {method!r}; expected one of "
             f"{', '.join(VALID_METHODS)}."
         )
     def mutate(rows: list[dict[str, Any]]) -> None:
         for row in rows:
-            if row.get("submission_id") == submission_id:
                 row["validation_status"] = "validated"
                 row["validation_method"] = method
-                return
-        raise LookupError(
-            f"No row with submission_id={submission_id!r} in results.jsonl."
-        )
     _hub_rmw_results(
         mutate,
-        commit_message=f"promote {submission_id} to validated ({method})",
     )
-def demote_row(submission_id: str) -> None:
-    """Return a row to the unvalidated tier, clearing ``validation_method``.
-    Sets ``validation_status`` to ``"unvalidated"`` and nulls
-    ``validation_method``. Idempotent on an already-unvalidated row.
     Raises:
-        LookupError: no row in ``results.jsonl`` carries *submission_id*.
     """
     def mutate(rows: list[dict[str, Any]]) -> None:
         for row in rows:
-            if row.get("submission_id") == submission_id:
                 row["validation_status"] = "unvalidated"
                 row["validation_method"] = None
-                return
-        raise LookupError(
-            f"No row with submission_id={submission_id!r} in results.jsonl."
-        )
     _hub_rmw_results(
         mutate,
-        commit_message=f"demote {submission_id} to unvalidated",
     )

 import logging
 import os
+from typing import Any, Iterable
 import gradio as gr
+from huggingface_hub.errors import EntryNotFoundError
+from submit import (
+    HF_SUBMISSIONS_REPO,
+    REPORTS_DIR,
+    SUBMISSIONS_DIR,
+    _HF_API,
+    _hub_rmw_results,
+)
 logger = logging.getLogger(__name__)
     return profile.username in admin_usernames()
+def _clean_id_set(submission_ids: Iterable[str]) -> set[str]:
+    """Normalise an id iterable to a non-empty set, else raise.
+    Guards every bulk helper: a no-op call (nothing selected) is a
+    caller error, surfaced as ``ValueError`` rather than a silent
+    empty write.
+    """
+    ids = {str(s) for s in submission_ids if s}
+    if not ids:
+        raise ValueError("No submissions selected.")
+    return ids
+def promote_rows(submission_ids: Iterable[str], method: str) -> None:
+    """Move every listed row into the validated tier with *method*.
+    One ``results.jsonl`` write for the whole batch. Idempotent on rows
+    already validated (their method is set to *method*).
     Raises:
+        ValueError: *method* is unknown, or no ids were given.
+        LookupError: one or more ids are absent from ``results.jsonl``
+            (no partial write happens; the helper raises inside the
+            read-modify-write before the upload).
     """
     if method not in VALID_METHODS:
         raise ValueError(
             f"Unknown validation_method {method!r}; expected one of "
             f"{', '.join(VALID_METHODS)}."
         )
+    ids = _clean_id_set(submission_ids)
     def mutate(rows: list[dict[str, Any]]) -> None:
+        seen = set()
         for row in rows:
+            if row.get("submission_id") in ids:
                 row["validation_status"] = "validated"
                 row["validation_method"] = method
+                seen.add(row["submission_id"])
+        _raise_for_missing(ids, seen)
     _hub_rmw_results(
         mutate,
+        commit_message=f"promote {len(ids)} row(s) to validated ({method})",
     )
+def demote_rows(submission_ids: Iterable[str]) -> None:
+    """Return every listed row to the unvalidated tier, clearing method.
+    One ``results.jsonl`` write for the whole batch. Idempotent on rows
+    already unvalidated.
     Raises:
+        ValueError: no ids were given.
+        LookupError: one or more ids are absent from ``results.jsonl``.
     """
+    ids = _clean_id_set(submission_ids)
     def mutate(rows: list[dict[str, Any]]) -> None:
+        seen = set()
         for row in rows:
+            if row.get("submission_id") in ids:
                 row["validation_status"] = "unvalidated"
                 row["validation_method"] = None
+                seen.add(row["submission_id"])
+        _raise_for_missing(ids, seen)
     _hub_rmw_results(
         mutate,
+        commit_message=f"demote {len(ids)} row(s) to unvalidated",
+    )
+def delete_rows(submission_ids: Iterable[str]) -> None:
+    """Permanently delete every listed submission: artifacts then row.
+    Irreversible. For each id, best-effort deletes the companion blobs
+    (``submissions/<id>.zip``, ``reports/<id>.{html,json}``) and then
+    drops the row from ``results.jsonl`` in a single write. A blob that
+    does not exist is skipped (a failed / pending row may never have
+    had a report). Missing ``results.jsonl`` rows are tolerated too, so
+    a re-run after a partial failure still converges.
+    Raises:
+        ValueError: no ids were given.
+    """
+    ids = _clean_id_set(submission_ids)
+    for sid in sorted(ids):
+        for path in (
+            f"{SUBMISSIONS_DIR}/{sid}.zip",
+            f"{REPORTS_DIR}/{sid}.html",
+            f"{REPORTS_DIR}/{sid}.json",
+        ):
+            try:
+                _HF_API.delete_file(
+                    path_in_repo=path,
+                    repo_id=HF_SUBMISSIONS_REPO,
+                    repo_type="dataset",
+                    commit_message=f"delete artifact {path}",
+                )
+            except EntryNotFoundError:
+                pass
+            except Exception as e:  # noqa: BLE001 - keep deleting the rest
+                logger.warning(
+                    "Failed to delete artifact %s (%s: %s)",
+                    path, type(e).__name__, e,
+                )
+    def mutate(rows: list[dict[str, Any]]) -> None:
+        rows[:] = [r for r in rows if r.get("submission_id") not in ids]
+    _hub_rmw_results(
+        mutate, commit_message=f"delete {len(ids)} submission(s)",
     )
+def _raise_for_missing(requested: set[str], seen: set[str]) -> None:
+    """Raise ``LookupError`` if any requested id was not found in the rows."""
+    missing = requested - seen
+    if missing:
+        raise LookupError(
+            f"submission_id(s) not in results.jsonl: {', '.join(sorted(missing))}."
+        )
+def promote_row(submission_id: str, method: str) -> None:
+    """Single-row convenience wrapper over :func:`promote_rows`."""
+    promote_rows([submission_id], method)
+def demote_row(submission_id: str) -> None:
+    """Single-row convenience wrapper over :func:`demote_rows`."""
+    demote_rows([submission_id])

leaderboard.py CHANGED Viewed

@@ -467,3 +467,64 @@ def build_combined_csv() -> str:
     path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
     df.to_csv(path, index=False)
     return str(path)

     path = out_dir / f"cadgenbench-leaderboard-{uuid.uuid4().hex[:8]}.csv"
     df.to_csv(path, index=False)
     return str(path)
+# Admin-tab table. A single flat view of every row (both tiers) with a
+# leading editable ``select`` checkbox column; the rest is read-only
+# context the maintainer scans before acting. Raw values (not the
+# display-formatted leaderboard strings) so the maintainer sees exactly
+# what is on the row. `submission_id` rides last as the action key.
+ADMIN_SELECT_COL = "select"
+ADMIN_COLUMNS = [
+    ADMIN_SELECT_COL,
+    "validation_status",
+    "validation_method",
+    "submission_name",
+    "submitter_name",
+    "submitted_at",
+    "status",
+    "aggregate_score",
+    "submission_id",
+]
+def load_admin_table() -> pd.DataFrame:
+    """Build the Admin tab's editable table: one row per submission.
+    Both tiers in a single frame, validated first then by score, with a
+    fresh (all-unchecked) ``select`` column the maintainer ticks to
+    choose action targets. Legacy rows get the same ``status`` /
+    ``validation_status`` defaults the leaderboard reader applies, so
+    pre-schema-bump rows still show up and are actionable.
+    """
+    rows = _load_rows_from_hub()
+    if rows is None:
+        logger.info("Admin table build falling back to local results.jsonl")
+        rows = _load_rows_from_local()
+    rows = rows or []
+    for row in rows:
+        if row.get("status") is None:
+            row["status"] = "completed"
+        if row.get("validation_status") is None:
+            row["validation_status"] = "unvalidated"
+    if not rows:
+        return pd.DataFrame(columns=ADMIN_COLUMNS)
+    df = pd.DataFrame(rows)
+    for c in ADMIN_COLUMNS:
+        if c not in df.columns:
+            df[c] = None
+    if "submitted_at" in df.columns:
+        df["submitted_at"] = df["submitted_at"].apply(_fmt_timestamp)
+    df = (
+        df[ADMIN_COLUMNS]
+        .sort_values(
+            ["validation_status", "aggregate_score"],
+            ascending=[False, False],
+            na_position="last",
+        )
+        .reset_index(drop=True)
+    )
+    # Set after projection so the column is a clean all-False boolean
+    # regardless of what (if anything) a stray source key held.
+    df[ADMIN_SELECT_COL] = False
+    return df

tests/test_admin.py CHANGED Viewed

@@ -55,7 +55,11 @@ def hub(monkeypatch):
     re-parsed back into dicts. ``state["uploads"]`` counts the writes
     so a test can assert how many commits a call produced.
     """
-    state: dict = {"rows": [dict(r) for r in SEED_ROWS], "uploads": 0}
     def fake_download() -> str:
         return _jsonl(state["rows"])
@@ -71,8 +75,12 @@ def hub(monkeypatch):
         ]
         state["uploads"] += 1
     monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
     monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
     return state
@@ -118,3 +126,58 @@ def test_promote_idempotent(hub):
     # Second identical promotion produces an identical row.
     admin.promote_row("beta", "code")
     assert _row(hub["rows"], "beta") == once

     re-parsed back into dicts. ``state["uploads"]`` counts the writes
     so a test can assert how many commits a call produced.
     """
+    state: dict = {
+        "rows": [dict(r) for r in SEED_ROWS],
+        "uploads": 0,
+        "deleted_paths": [],
+    }
     def fake_download() -> str:
         return _jsonl(state["rows"])
         ]
         state["uploads"] += 1
+    def fake_delete_file(*, path_in_repo, **kwargs) -> None:
+        state["deleted_paths"].append(path_in_repo)
     monkeypatch.setattr(submit, "_download_results_jsonl", fake_download)
     monkeypatch.setattr(submit._HF_API, "upload_file", fake_upload)
+    monkeypatch.setattr(submit._HF_API, "delete_file", fake_delete_file)
     return state
     # Second identical promotion produces an identical row.
     admin.promote_row("beta", "code")
     assert _row(hub["rows"], "beta") == once
+def test_promote_rows_bulk(hub):
+    """A bulk promote flips every listed row in one write."""
+    admin.promote_rows(["alpha", "beta"], "traces")
+    for sid in ("alpha", "beta"):
+        row = _row(hub["rows"], sid)
+        assert row["validation_status"] == "validated"
+        assert row["validation_method"] == "traces"
+    assert hub["uploads"] == 1
+def test_demote_rows_bulk(hub):
+    """A bulk demote clears method on every listed row in one write."""
+    admin.demote_rows(["alpha", "beta"])
+    for sid in ("alpha", "beta"):
+        row = _row(hub["rows"], sid)
+        assert row["validation_status"] == "unvalidated"
+        assert row["validation_method"] is None
+    assert hub["uploads"] == 1
+def test_promote_rows_missing_id_raises_without_write(hub):
+    """An unknown id aborts the whole batch before any upload."""
+    with pytest.raises(LookupError):
+        admin.promote_rows(["alpha", "ghost"], "code")
+    assert hub["uploads"] == 0
+    # alpha is untouched since the write never happened.
+    assert _row(hub["rows"], "alpha")["validation_status"] == "unvalidated"
+def test_empty_selection_raises(hub):
+    """Bulk helpers reject an empty / all-falsy selection."""
+    for call in (
+        lambda: admin.promote_rows([], "code"),
+        lambda: admin.demote_rows([None, ""]),
+        lambda: admin.delete_rows([]),
+    ):
+        with pytest.raises(ValueError):
+            call()
+    assert hub["uploads"] == 0
+def test_delete_rows_removes_rows_and_artifacts(hub):
+    """Delete drops the rows and best-effort removes their artifacts."""
+    admin.delete_rows(["alpha"])
+    remaining = {r["submission_id"] for r in hub["rows"]}
+    assert remaining == {"beta"}
+    # All three companion blobs were targeted for deletion.
+    assert hub["deleted_paths"] == [
+        "submissions/alpha.zip",
+        "reports/alpha.html",
+        "reports/alpha.json",
+    ]
+    assert hub["uploads"] == 1