Spaces:
Sleeping
Sleeping
| from ethicsguard.baselines import audit_thresholds, run_all_baselines | |
| def test_baselines_cover_all_difficulties_and_agents() -> None: | |
| results = run_all_baselines(split="eval") | |
| assert {"easy", "medium", "hard"} <= set(results) | |
| for difficulty in ("easy", "medium", "hard"): | |
| assert { | |
| "random", | |
| "greedy_by_hint", | |
| "rule_based", | |
| "always_escalate", | |
| "always_approve", | |
| } <= set(results[difficulty]) | |
| def test_audit_threshold_keys_exist() -> None: | |
| checks = audit_thresholds(split="eval") | |
| assert "easy.always_escalate_below_0_35" in checks | |
| assert "hard.always_approve_below_0_35" in checks | |