cicd-debugger-env / tests /test_env.py
Lishika's picture
clean final submission
30bf68a
import asyncio
import unittest
from env.environment import CICDDebuggerEnvironment, REQUIRED_TOOLS
class EnvironmentContractTests(unittest.TestCase):
def test_reset_returns_structured_observation(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=7)
observation = asyncio.run(env.reset(task_id="easy-command-typo"))
self.assertIn("config", observation)
self.assertIn("logs", observation)
self.assertIn("error_message", observation)
self.assertIn("progress_flags", observation)
self.assertEqual(observation["task_id"], "easy-command-typo")
self.assertEqual(observation["available_tools"], REQUIRED_TOOLS)
self.assertEqual(observation["step_count"], 0)
def test_step_returns_obs_reward_done_info(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=3)
asyncio.run(env.reset(task_id="easy-command-typo"))
observation, reward, done, info = asyncio.run(env.step("read_logs: inspect failing stage logs"))
self.assertIsInstance(observation, dict)
self.assertIsInstance(reward, float)
self.assertIsInstance(done, bool)
self.assertIsInstance(info, dict)
self.assertIn("tool", info)
def test_action_space_rejects_extra_tools(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=5)
asyncio.run(env.reset(task_id="easy-command-typo"))
observation, reward, done, info = asyncio.run(env.step("propose_fix: force deploy"))
self.assertIn("error", info)
self.assertIsNotNone(info["error"])
self.assertFalse(done)
self.assertGreaterEqual(reward, 0.0)
self.assertIn("config", observation)
def test_action_space_rejects_alias_tools(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=15)
asyncio.run(env.reset(task_id="easy-command-typo"))
_, _, done, info = asyncio.run(env.step("read: workflow file"))
self.assertIn("error", info)
self.assertIsNotNone(info["error"])
self.assertFalse(done)
def test_submit_solution_path(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=9)
asyncio.run(env.reset(task_id="easy-command-typo"))
asyncio.run(env.step("read_logs: inspect logs"))
asyncio.run(env.step("analyze_error: identify root cause"))
asyncio.run(env.step("edit_config: replace npm tset with npm test"))
asyncio.run(env.step("run_pipeline_stage: run test stage"))
asyncio.run(env.step("run_tests: execute tests"))
asyncio.run(env.step("validate_fix: validate score"))
observation, reward, done, info = asyncio.run(env.step("submit_solution: submit current fix"))
self.assertTrue(done)
self.assertGreaterEqual(reward, 0.0)
self.assertIsNone(info.get("error"))
self.assertEqual(observation["progress_flags"].get("submit_solution"), True)
def test_internal_state_tracks_required_fields(self):
env = CICDDebuggerEnvironment(max_steps=10, seed=11)
asyncio.run(env.reset(task_id="easy-command-typo"))
asyncio.run(env.step("read_logs: inspect logs"))
state = env.get_state()
self.assertTrue(state.get("initialized"))
self.assertIn("actual_bug", state)
self.assertIn("correct_solution", state)
self.assertIn("progress_flags", state)
self.assertIn("file_modification_count", state)
self.assertIn("hidden_test_pass_rate", state)
def test_yaml_task_is_fixable_via_edit_flow(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=17)
asyncio.run(env.reset(task_id="easy-yaml-indentation"))
asyncio.run(env.step("read_logs: inspect logs"))
asyncio.run(env.step("analyze_error: identify root cause"))
observation, _, _, _ = asyncio.run(env.step("edit_config: fix YAML indentation and syntax"))
self.assertIn("- run: pytest", observation["config"])
self.assertNotIn(" - run: pytest", observation["config"])
asyncio.run(env.step("run_tests: execute tests"))
asyncio.run(env.step("validate_fix: validate score"))
_, _, done, info = asyncio.run(env.step("submit_solution: submit current fix"))
self.assertTrue(done)
self.assertIsNone(info.get("error"))
def test_hard_needs_order_edit_updates_deploy_dependency(self):
env = CICDDebuggerEnvironment(max_steps=12, seed=19)
asyncio.run(env.reset(task_id="hard-needs-order"))
observation, _, _, _ = asyncio.run(env.step("edit_config: fix deploy dependency ordering"))
self.assertIn("needs: [build, test]", observation["config"])
self.assertEqual(observation["config"].count("needs: build"), 1)
if __name__ == "__main__":
unittest.main()