Spaces:
Sleeping
Sleeping
| from graders.easy_grader import grade_easy | |
| from graders.medium_grader import grade_medium | |
| from graders.hard_grader import grade_hard | |
| from graders.programmatic_grader import grade_episode | |
| def test_easy_grader(): | |
| assert grade_easy([]) == 0.0 | |
| history = [{"attack_success_estimate": 0.9}] | |
| assert grade_easy(history) == 0.9 | |
| history = [{"attack_success_estimate": 0.1}] | |
| assert grade_easy(history) == 0.1 | |
| history = [{"attack_success_estimate": 1.5}] | |
| assert grade_easy(history) == 1.0 | |
| def test_medium_grader_diversity(): | |
| h1 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}] | |
| h2 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}, {"attack_success_estimate": 0.5, "strategy_type": "injection"}] | |
| assert grade_medium(h2) > grade_medium(h1) | |
| def test_hard_grader_bonus_penalty(): | |
| h_base = [ | |
| {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"} | |
| ] | |
| h_bonus = [ | |
| {"attack_success_estimate": 0.9, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"} | |
| ] | |
| h_penalty = [ | |
| {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}, | |
| {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}, | |
| {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"} | |
| ] | |
| assert grade_hard(h_bonus) > grade_hard(h_base) | |
| assert grade_hard(h_penalty) < grade_hard(h_base) | |
| def test_programmatic_grader_structure(): | |
| history = [ | |
| {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"} | |
| ] | |
| report = grade_episode(history) | |
| assert "overall_score" in report | |
| assert "letter_grade" in report | |
| assert "metrics" in report | |
| assert 0.0 <= report["overall_score"] <= 1.0 | |