File size: 4,805 Bytes
30bf68a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | import asyncio
import unittest
from env.environment import CICDDebuggerEnvironment, REQUIRED_TOOLS
class EnvironmentContractTests(unittest.TestCase):
def test_reset_returns_structured_observation(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=7)
observation = asyncio.run(env.reset(task_id="easy-command-typo"))
self.assertIn("config", observation)
self.assertIn("logs", observation)
self.assertIn("error_message", observation)
self.assertIn("progress_flags", observation)
self.assertEqual(observation["task_id"], "easy-command-typo")
self.assertEqual(observation["available_tools"], REQUIRED_TOOLS)
self.assertEqual(observation["step_count"], 0)
def test_step_returns_obs_reward_done_info(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=3)
asyncio.run(env.reset(task_id="easy-command-typo"))
observation, reward, done, info = asyncio.run(env.step("read_logs: inspect failing stage logs"))
self.assertIsInstance(observation, dict)
self.assertIsInstance(reward, float)
self.assertIsInstance(done, bool)
self.assertIsInstance(info, dict)
self.assertIn("tool", info)
def test_action_space_rejects_extra_tools(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=5)
asyncio.run(env.reset(task_id="easy-command-typo"))
observation, reward, done, info = asyncio.run(env.step("propose_fix: force deploy"))
self.assertIn("error", info)
self.assertIsNotNone(info["error"])
self.assertFalse(done)
self.assertGreaterEqual(reward, 0.0)
self.assertIn("config", observation)
def test_action_space_rejects_alias_tools(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=15)
asyncio.run(env.reset(task_id="easy-command-typo"))
_, _, done, info = asyncio.run(env.step("read: workflow file"))
self.assertIn("error", info)
self.assertIsNotNone(info["error"])
self.assertFalse(done)
def test_submit_solution_path(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=9)
asyncio.run(env.reset(task_id="easy-command-typo"))
asyncio.run(env.step("read_logs: inspect logs"))
asyncio.run(env.step("analyze_error: identify root cause"))
asyncio.run(env.step("edit_config: replace npm tset with npm test"))
asyncio.run(env.step("run_pipeline_stage: run test stage"))
asyncio.run(env.step("run_tests: execute tests"))
asyncio.run(env.step("validate_fix: validate score"))
observation, reward, done, info = asyncio.run(env.step("submit_solution: submit current fix"))
self.assertTrue(done)
self.assertGreaterEqual(reward, 0.0)
self.assertIsNone(info.get("error"))
self.assertEqual(observation["progress_flags"].get("submit_solution"), True)
def test_internal_state_tracks_required_fields(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=11)
asyncio.run(env.reset(task_id="easy-command-typo"))
asyncio.run(env.step("read_logs: inspect logs"))
state = env.get_state()
self.assertTrue(state.get("initialized"))
self.assertIn("actual_bug", state)
self.assertIn("correct_solution", state)
self.assertIn("progress_flags", state)
self.assertIn("file_modification_count", state)
self.assertIn("hidden_test_pass_rate", state)
def test_yaml_task_is_fixable_via_edit_flow(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=17)
asyncio.run(env.reset(task_id="easy-yaml-indentation"))
asyncio.run(env.step("read_logs: inspect logs"))
asyncio.run(env.step("analyze_error: identify root cause"))
observation, _, _, _ = asyncio.run(env.step("edit_config: fix YAML indentation and syntax"))
self.assertIn("- run: pytest", observation["config"])
self.assertNotIn(" - run: pytest", observation["config"])
asyncio.run(env.step("run_tests: execute tests"))
asyncio.run(env.step("validate_fix: validate score"))
_, _, done, info = asyncio.run(env.step("submit_solution: submit current fix"))
self.assertTrue(done)
self.assertIsNone(info.get("error"))
def test_hard_needs_order_edit_updates_deploy_dependency(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=19)
asyncio.run(env.reset(task_id="hard-needs-order"))
observation, _, _, _ = asyncio.run(env.step("edit_config: fix deploy dependency ordering"))
self.assertIn("needs: [build, test]", observation["config"])
self.assertEqual(observation["config"].count("needs: build"), 1)
if __name__ == "__main__":
unittest.main()
|