File size: 3,667 Bytes
fcb838d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""Tests for the CodeCourt environment."""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from env.codecourt_env import CodeCourtEnv
from agents.setter import SetterAgent
from agents.solver import SolverAgent


def test_env_reset():
    env = CodeCourtEnv()
    obs = env.reset()
    assert "episode_id" in obs
    assert "archetype" in obs
    assert "difficulty" in obs
    assert "public_test_cases" in obs
    assert "hidden_test_count" in obs
    assert obs["generation_mode"] == "dynamic"
    print("✓ test_env_reset")


def test_env_step():
    env = CodeCourtEnv()
    obs = env.reset()
    
    setter = SetterAgent(use_reference=True)
    solver = SolverAgent(use_reference=True)
    
    setter_code = setter.generate_solution(env._current_state.problem)
    solver_code = solver.solve(env._current_state.problem)
    
    setter_info, solver_info, done, info = env.step(setter_code, solver_code)
    
    assert done == True
    assert "outcome" in info
    assert "setter_valid" in info
    assert "solver_public_pass_rate" in info
    assert "solver_hidden_pass_rate" in info
    assert "dynamic_trap_count" in info
    print("✓ test_env_step")


def test_dynamic_problem_metadata():
    env = CodeCourtEnv()
    env.reset()
    problem = env._current_state.problem
    assert problem["generation_mode"] == "dynamic"
    assert "trap_explanation" in problem
    assert problem["reference_solution"]
    print("✓ test_dynamic_problem_metadata")


def test_dynamic_traps_added_for_bruteforce_solver():
    env = CodeCourtEnv()
    env.reset()

    setter = SetterAgent(use_reference=True)
    solver = SolverAgent(use_brute_force=True)

    setter_code = setter.generate_solution(env._current_state.problem)
    solver_code = solver.solve(env._current_state.problem)

    _, _, done, info = env.step(setter_code, solver_code)

    assert done is True
    assert info["dynamic_trap_count"] >= 1
    assert len(env._current_state.problem.get("trap_test_cases", [])) == info["dynamic_trap_count"]
    print("✓ test_dynamic_traps_added_for_bruteforce_solver")


def test_difficulty_progression():
    env = CodeCourtEnv(difficulty_progression=True)
    setter = SetterAgent(use_reference=True)
    solver = SolverAgent(use_reference=True)
    
    # With reference solutions for both, solver will always pass
    # This tests that the difficulty progression logic runs
    for _ in range(10):
        obs = env.reset()
        setter_code = setter.generate_solution(env._current_state.problem)
        solver_code = solver.solve(env._current_state.problem)
        env.step(setter_code, solver_code)
    
    # Just verify the environment ran without errors
    assert env._episode_count == 10
    print("✓ test_difficulty_progression")


def test_elo_tracker():
    env = CodeCourtEnv()
    setter = SetterAgent(use_reference=True)
    solver = SolverAgent(use_reference=True)
    
    for _ in range(10):
        obs = env.reset()
        setter_code = setter.generate_solution(env._current_state.problem)
        solver_code = solver.solve(env._current_state.problem)
        env.step(setter_code, solver_code)
    
    stats = env.elo.get_stats()
    assert "setter_elo" in stats
    assert "solver_elo" in stats
    assert stats["episodes"] == 10
    print("✓ test_elo_tracker")


if __name__ == "__main__":
    print("Running Environment tests...")
    test_env_reset()
    test_env_step()
    test_dynamic_problem_metadata()
    test_dynamic_traps_added_for_bruteforce_solver()
    test_difficulty_progression()
    test_elo_tracker()
    print("\n✅ All environment tests passed!")