Spaces:
Sleeping
Sleeping
File size: 682 Bytes
37b04ce 43912d4 37b04ce 43912d4 37b04ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | from ethicsguard.baselines import audit_thresholds, run_all_baselines
def test_baselines_cover_all_difficulties_and_agents() -> None:
results = run_all_baselines(split="eval")
assert {"easy", "medium", "hard"} <= set(results)
for difficulty in ("easy", "medium", "hard"):
assert {
"random",
"greedy_by_hint",
"rule_based",
"always_escalate",
"always_approve",
} <= set(results[difficulty])
def test_audit_threshold_keys_exist() -> None:
checks = audit_thresholds(split="eval")
assert "easy.always_escalate_below_0_35" in checks
assert "hard.always_approve_below_0_35" in checks
|