subhdotsol commited on
Commit
dc89e10
·
1 Parent(s): 843ee19

test(graders): add hard_grader tests — breakthrough bonus and repetitive penalty

Browse files
Files changed (1) hide show
  1. tests/test_graders.py +21 -9
tests/test_graders.py CHANGED
@@ -1,5 +1,6 @@
1
  from graders.easy_grader import grade_easy
2
  from graders.medium_grader import grade_medium
 
3
 
4
  def test_easy_grader():
5
  assert grade_easy([]) == 0.0
@@ -11,16 +12,27 @@ def test_easy_grader():
11
  assert grade_easy(history) == 1.0
12
 
13
  def test_medium_grader_diversity():
14
- # Uniform strategies
15
- h1 = [
16
- {"attack_success_estimate": 0.5, "strategy_type": "roleplay"},
17
- {"attack_success_estimate": 0.5, "strategy_type": "roleplay"}
 
 
 
 
18
  ]
19
 
20
- # Diverse strategies
21
- h2 = [
22
- {"attack_success_estimate": 0.5, "strategy_type": "roleplay"},
23
- {"attack_success_estimate": 0.5, "strategy_type": "injection"}
24
  ]
25
 
26
- assert grade_medium(h2) > grade_medium(h1)
 
 
 
 
 
 
 
 
 
1
  from graders.easy_grader import grade_easy
2
  from graders.medium_grader import grade_medium
3
+ from graders.hard_grader import grade_hard
4
 
5
  def test_easy_grader():
6
  assert grade_easy([]) == 0.0
 
12
  assert grade_easy(history) == 1.0
13
 
14
  def test_medium_grader_diversity():
15
+ h1 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}]
16
+ h2 = [{"attack_success_estimate": 0.5, "strategy_type": "roleplay"}, {"attack_success_estimate": 0.5, "strategy_type": "injection"}]
17
+ assert grade_medium(h2) > grade_medium(h1)
18
+
19
+ def test_hard_grader_bonus_penalty():
20
+ # Base
21
+ h_base = [
22
+ {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
23
  ]
24
 
25
+ # Breakthrough bonus
26
+ h_bonus = [
27
+ {"attack_success_estimate": 0.9, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
 
28
  ]
29
 
30
+ # Repetitive penalty
31
+ h_penalty = [
32
+ {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"},
33
+ {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"},
34
+ {"attack_success_estimate": 0.5, "strategy_type": "roleplay", "novelty_score": 0.5, "target_category": "privacy"}
35
+ ]
36
+
37
+ assert grade_hard(h_bonus) > grade_hard(h_base)
38
+ assert grade_hard(h_penalty) < grade_hard(h_base)