Spaces:

mood-bench
/

leaderboard

Sleeping

App Files Files Community

leaderboard / src /about.py

Dylan123

Upload folder using huggingface_hub

5479db5 verified 15 days ago

raw

history blame contribute delete

1.81 kB

	from __future__ import annotations

	TITLE = "MOOD Bench Leaderboard"

	METRIC_KEY = "tpr@fpr0.01"
	METRIC_LABEL = "TPR @ FPR=1%"

	INTRO_MARKDOWN = f"""
	# MOOD Bench Leaderboard

	Multi-domain out-of-distribution safety detection on the
	[`mood-bench`](https://huggingface.co/datasets/mood-bench/mood-bench) test split.

	Each row is a detection method. For every out-of-distribution unsafe domain we
	report the true-positive rate at a fixed 1% false-positive rate against the
	pooled in-distribution safe conversations. The Overall column is the unweighted
	mean across OOD-unsafe domains.

	Use the Columns picker below to show the per-domain breakdown, or the filter
	controls to narrow the table down. Sorted by Overall {METRIC_LABEL}.
	"""

	GROUPS: list[str] = [
	"overall",
	"id",
	"controlling",
	"insecure-code",
	"scheming",
	"jailbroken",
	"sycophantic",
	"function-calling-missing",
	"function-calling-inappropriate",
	"swahili",
	]

	GROUP_LABELS: dict[str, str] = {
	"overall": "Overall",
	"id": "ID",
	"controlling": "Controlling",
	"insecure-code": "Insecure Code",
	"scheming": "Scheming",
	"jailbroken": "Jailbroken",
	"sycophantic": "Sycophantic",
	"function-calling-missing": "FC Missing",
	"function-calling-inappropriate": "FC Inapprop.",
	"swahili": "Swahili",
	}

	META_COLUMNS: list[str] = ["Method", "Model", "Submitted By", "Submitted At"]


	def metric_column(group: str) -> str:
	"""Stable column header for the flattened DataFrame."""
	return GROUP_LABELS[group]


	ALL_METRIC_COLUMNS: list[str] = [metric_column(g) for g in GROUPS]

	DEFAULT_VISIBLE_COLUMNS: list[str] = [
	"Method",
	"Model",
	metric_column("overall"),
	metric_column("id"),
	]

	PRIMARY_SORT_COLUMN = metric_column("overall")