Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| TITLE = "MOOD Bench Leaderboard" | |
| METRIC_KEY = "tpr@fpr0.01" | |
| METRIC_LABEL = "TPR @ FPR=1%" | |
| INTRO_MARKDOWN = f""" | |
| # MOOD Bench Leaderboard | |
| Multi-domain out-of-distribution safety detection on the | |
| [`mood-bench`](https://huggingface.co/datasets/mood-bench/mood-bench) test split. | |
| Each row is a **detection method**. For every out-of-distribution unsafe domain we | |
| report the true-positive rate at a fixed **1% false-positive rate** against the | |
| pooled in-distribution *safe* conversations. The **Overall** column is the unweighted | |
| mean across OOD-unsafe domains. | |
| Use the **Columns** picker below to show the per-domain breakdown, or the filter | |
| controls to narrow the table down. Sorted by Overall {METRIC_LABEL}. | |
| """ | |
| GROUPS: list[str] = [ | |
| "overall", | |
| "id", | |
| "controlling", | |
| "insecure-code", | |
| "scheming", | |
| "jailbroken", | |
| "sycophantic", | |
| "function-calling-missing", | |
| "function-calling-inappropriate", | |
| "swahili", | |
| ] | |
| GROUP_LABELS: dict[str, str] = { | |
| "overall": "Overall", | |
| "id": "ID", | |
| "controlling": "Controlling", | |
| "insecure-code": "Insecure Code", | |
| "scheming": "Scheming", | |
| "jailbroken": "Jailbroken", | |
| "sycophantic": "Sycophantic", | |
| "function-calling-missing": "FC Missing", | |
| "function-calling-inappropriate": "FC Inapprop.", | |
| "swahili": "Swahili", | |
| } | |
| META_COLUMNS: list[str] = ["Method", "Model", "Submitted By", "Submitted At"] | |
| def metric_column(group: str) -> str: | |
| """Stable column header for the flattened DataFrame.""" | |
| return GROUP_LABELS[group] | |
| ALL_METRIC_COLUMNS: list[str] = [metric_column(g) for g in GROUPS] | |
| DEFAULT_VISIBLE_COLUMNS: list[str] = [ | |
| "Method", | |
| "Model", | |
| metric_column("overall"), | |
| metric_column("id"), | |
| ] | |
| PRIMARY_SORT_COLUMN = metric_column("overall") | |