ethicsguard / tests /test_baselines.py
GodreignElgin
test
37b04ce
raw
history blame contribute delete
682 Bytes
from ethicsguard.baselines import audit_thresholds, run_all_baselines
def test_baselines_cover_all_difficulties_and_agents() -> None:
results = run_all_baselines(split="eval")
assert {"easy", "medium", "hard"} <= set(results)
for difficulty in ("easy", "medium", "hard"):
assert {
"random",
"greedy_by_hint",
"rule_based",
"always_escalate",
"always_approve",
} <= set(results[difficulty])
def test_audit_threshold_keys_exist() -> None:
checks = audit_thresholds(split="eval")
assert "easy.always_escalate_below_0_35" in checks
assert "hard.always_approve_below_0_35" in checks