AIMLxDIV commited on
Commit
e2e527d
·
1 Parent(s): 7f47157

Add tests/test_env.py

Browse files
Files changed (1) hide show
  1. tests/test_env.py +82 -0
tests/test_env.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from codereview_env.env import CodeReviewEnv
3
+ from codereview_env.models import TaskId, Action, ActionType, Category, Severity, Verdict
4
+
5
+ def test_env_reset():
6
+ env = CodeReviewEnv()
7
+ res = env.reset(TaskId.BUG_DETECTION, seed=0)
8
+ assert res.task_id == TaskId.BUG_DETECTION
9
+ assert res.seed == 0
10
+ assert res.observation.step_count == 0
11
+ assert res.observation.noise_budget == 5
12
+
13
+ def test_env_step_bug_detection():
14
+ env = CodeReviewEnv()
15
+ res = env.reset(TaskId.BUG_DETECTION, seed=1)
16
+ # Seed 1 selects bug_003: None dereference in auth.py
17
+
18
+ # Flag the bug correctly
19
+ action = Action(
20
+ action_type=ActionType.FLAG_ISSUE,
21
+ body="None dereference null check guard clause",
22
+ filename="auth.py",
23
+ line_number=16,
24
+ category=Category.BUG,
25
+ severity=Severity.HIGH
26
+ )
27
+ step_res = env.step(action)
28
+ assert step_res.observation.step_count == 1
29
+ assert step_res.reward > 0
30
+ assert step_res.done == False
31
+
32
+ # Terminal action
33
+ action_term = Action(
34
+ action_type=ActionType.APPROVE,
35
+ body="LGTM",
36
+ verdict=Verdict.LGTM
37
+ )
38
+ step_term = env.step(action_term)
39
+ assert step_term.done == True
40
+
41
+ final = env.get_final_result()
42
+ assert final.final_score > 0
43
+
44
+ def test_env_noise_budget_exhaustion():
45
+ env = CodeReviewEnv()
46
+ env.reset(TaskId.BUG_DETECTION, seed=0)
47
+
48
+ # Flag 5 false positives
49
+ action_fp = Action(
50
+ action_type=ActionType.FLAG_ISSUE,
51
+ body="fp",
52
+ filename="nonexistent",
53
+ line_number=999,
54
+ category=Category.BUG,
55
+ severity=Severity.LOW
56
+ )
57
+
58
+ for i in range(4):
59
+ res = env.step(action_fp)
60
+ assert res.done == False
61
+ assert res.observation.noise_budget == 5 - (i + 1)
62
+
63
+ res_final = env.step(action_fp)
64
+ assert res_final.done == True
65
+ assert res_final.observation.noise_budget == 0
66
+
67
+ def test_env_max_steps():
68
+ env = CodeReviewEnv()
69
+ env.reset(TaskId.BUG_DETECTION, seed=0)
70
+
71
+ action_neutral = Action(
72
+ action_type=ActionType.ASK_QUESTION,
73
+ body="what's this?"
74
+ )
75
+
76
+ for i in range(9):
77
+ res = env.step(action_neutral)
78
+ assert res.done == False
79
+
80
+ res_final = env.step(action_neutral)
81
+ assert res_final.done == True
82
+ assert res_final.observation.step_count == 10