File size: 4,805 Bytes
30bf68a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import asyncio
import unittest

from env.environment import CICDDebuggerEnvironment, REQUIRED_TOOLS


class EnvironmentContractTests(unittest.TestCase):
    def test_reset_returns_structured_observation(self):
        env = CICDDebuggerEnvironment(max_steps=10, seed=7)
        observation = asyncio.run(env.reset(task_id="easy-command-typo"))

        self.assertIn("config", observation)
        self.assertIn("logs", observation)
        self.assertIn("error_message", observation)
        self.assertIn("progress_flags", observation)
        self.assertEqual(observation["task_id"], "easy-command-typo")
        self.assertEqual(observation["available_tools"], REQUIRED_TOOLS)
        self.assertEqual(observation["step_count"], 0)

    def test_step_returns_obs_reward_done_info(self):
        env = CICDDebuggerEnvironment(max_steps=10, seed=3)
        asyncio.run(env.reset(task_id="easy-command-typo"))

        observation, reward, done, info = asyncio.run(env.step("read_logs: inspect failing stage logs"))

        self.assertIsInstance(observation, dict)
        self.assertIsInstance(reward, float)
        self.assertIsInstance(done, bool)
        self.assertIsInstance(info, dict)
        self.assertIn("tool", info)

    def test_action_space_rejects_extra_tools(self):
        env = CICDDebuggerEnvironment(max_steps=10, seed=5)
        asyncio.run(env.reset(task_id="easy-command-typo"))

        observation, reward, done, info = asyncio.run(env.step("propose_fix: force deploy"))

        self.assertIn("error", info)
        self.assertIsNotNone(info["error"])
        self.assertFalse(done)
        self.assertGreaterEqual(reward, 0.0)
        self.assertIn("config", observation)

    def test_action_space_rejects_alias_tools(self):
        env = CICDDebuggerEnvironment(max_steps=10, seed=15)
        asyncio.run(env.reset(task_id="easy-command-typo"))

        _, _, done, info = asyncio.run(env.step("read: workflow file"))

        self.assertIn("error", info)
        self.assertIsNotNone(info["error"])
        self.assertFalse(done)

    def test_submit_solution_path(self):
        env = CICDDebuggerEnvironment(max_steps=12, seed=9)
        asyncio.run(env.reset(task_id="easy-command-typo"))

        asyncio.run(env.step("read_logs: inspect logs"))
        asyncio.run(env.step("analyze_error: identify root cause"))
        asyncio.run(env.step("edit_config: replace npm tset with npm test"))
        asyncio.run(env.step("run_pipeline_stage: run test stage"))
        asyncio.run(env.step("run_tests: execute tests"))
        asyncio.run(env.step("validate_fix: validate score"))
        observation, reward, done, info = asyncio.run(env.step("submit_solution: submit current fix"))

        self.assertTrue(done)
        self.assertGreaterEqual(reward, 0.0)
        self.assertIsNone(info.get("error"))
        self.assertEqual(observation["progress_flags"].get("submit_solution"), True)

    def test_internal_state_tracks_required_fields(self):
        env = CICDDebuggerEnvironment(max_steps=10, seed=11)
        asyncio.run(env.reset(task_id="easy-command-typo"))
        asyncio.run(env.step("read_logs: inspect logs"))

        state = env.get_state()
        self.assertTrue(state.get("initialized"))
        self.assertIn("actual_bug", state)
        self.assertIn("correct_solution", state)
        self.assertIn("progress_flags", state)
        self.assertIn("file_modification_count", state)
        self.assertIn("hidden_test_pass_rate", state)

    def test_yaml_task_is_fixable_via_edit_flow(self):
        env = CICDDebuggerEnvironment(max_steps=12, seed=17)
        asyncio.run(env.reset(task_id="easy-yaml-indentation"))

        asyncio.run(env.step("read_logs: inspect logs"))
        asyncio.run(env.step("analyze_error: identify root cause"))
        observation, _, _, _ = asyncio.run(env.step("edit_config: fix YAML indentation and syntax"))

        self.assertIn("- run: pytest", observation["config"])
        self.assertNotIn("       - run: pytest", observation["config"])

        asyncio.run(env.step("run_tests: execute tests"))
        asyncio.run(env.step("validate_fix: validate score"))
        _, _, done, info = asyncio.run(env.step("submit_solution: submit current fix"))

        self.assertTrue(done)
        self.assertIsNone(info.get("error"))

    def test_hard_needs_order_edit_updates_deploy_dependency(self):
        env = CICDDebuggerEnvironment(max_steps=12, seed=19)
        asyncio.run(env.reset(task_id="hard-needs-order"))

        observation, _, _, _ = asyncio.run(env.step("edit_config: fix deploy dependency ordering"))

        self.assertIn("needs: [build, test]", observation["config"])
        self.assertEqual(observation["config"].count("needs: build"), 1)


if __name__ == "__main__":
    unittest.main()