import asyncio import unittest from env.environment import CICDDebuggerEnvironment, REQUIRED_TOOLS class EnvironmentContractTests(unittest.TestCase): def test_reset_returns_structured_observation(self): env = CICDDebuggerEnvironment(max_steps=10, seed=7) observation = asyncio.run(env.reset(task_id="easy-command-typo")) self.assertIn("config", observation) self.assertIn("logs", observation) self.assertIn("error_message", observation) self.assertIn("progress_flags", observation) self.assertEqual(observation["task_id"], "easy-command-typo") self.assertEqual(observation["available_tools"], REQUIRED_TOOLS) self.assertEqual(observation["step_count"], 0) def test_step_returns_obs_reward_done_info(self): env = CICDDebuggerEnvironment(max_steps=10, seed=3) asyncio.run(env.reset(task_id="easy-command-typo")) observation, reward, done, info = asyncio.run(env.step("read_logs: inspect failing stage logs")) self.assertIsInstance(observation, dict) self.assertIsInstance(reward, float) self.assertIsInstance(done, bool) self.assertIsInstance(info, dict) self.assertIn("tool", info) def test_action_space_rejects_extra_tools(self): env = CICDDebuggerEnvironment(max_steps=10, seed=5) asyncio.run(env.reset(task_id="easy-command-typo")) observation, reward, done, info = asyncio.run(env.step("propose_fix: force deploy")) self.assertIn("error", info) self.assertIsNotNone(info["error"]) self.assertFalse(done) self.assertGreaterEqual(reward, 0.0) self.assertIn("config", observation) def test_action_space_rejects_alias_tools(self): env = CICDDebuggerEnvironment(max_steps=10, seed=15) asyncio.run(env.reset(task_id="easy-command-typo")) _, _, done, info = asyncio.run(env.step("read: workflow file")) self.assertIn("error", info) self.assertIsNotNone(info["error"]) self.assertFalse(done) def test_submit_solution_path(self): env = CICDDebuggerEnvironment(max_steps=12, seed=9) asyncio.run(env.reset(task_id="easy-command-typo")) asyncio.run(env.step("read_logs: inspect logs")) asyncio.run(env.step("analyze_error: identify root cause")) asyncio.run(env.step("edit_config: replace npm tset with npm test")) asyncio.run(env.step("run_pipeline_stage: run test stage")) asyncio.run(env.step("run_tests: execute tests")) asyncio.run(env.step("validate_fix: validate score")) observation, reward, done, info = asyncio.run(env.step("submit_solution: submit current fix")) self.assertTrue(done) self.assertGreaterEqual(reward, 0.0) self.assertIsNone(info.get("error")) self.assertEqual(observation["progress_flags"].get("submit_solution"), True) def test_internal_state_tracks_required_fields(self): env = CICDDebuggerEnvironment(max_steps=10, seed=11) asyncio.run(env.reset(task_id="easy-command-typo")) asyncio.run(env.step("read_logs: inspect logs")) state = env.get_state() self.assertTrue(state.get("initialized")) self.assertIn("actual_bug", state) self.assertIn("correct_solution", state) self.assertIn("progress_flags", state) self.assertIn("file_modification_count", state) self.assertIn("hidden_test_pass_rate", state) def test_yaml_task_is_fixable_via_edit_flow(self): env = CICDDebuggerEnvironment(max_steps=12, seed=17) asyncio.run(env.reset(task_id="easy-yaml-indentation")) asyncio.run(env.step("read_logs: inspect logs")) asyncio.run(env.step("analyze_error: identify root cause")) observation, _, _, _ = asyncio.run(env.step("edit_config: fix YAML indentation and syntax")) self.assertIn("- run: pytest", observation["config"]) self.assertNotIn(" - run: pytest", observation["config"]) asyncio.run(env.step("run_tests: execute tests")) asyncio.run(env.step("validate_fix: validate score")) _, _, done, info = asyncio.run(env.step("submit_solution: submit current fix")) self.assertTrue(done) self.assertIsNone(info.get("error")) def test_hard_needs_order_edit_updates_deploy_dependency(self): env = CICDDebuggerEnvironment(max_steps=12, seed=19) asyncio.run(env.reset(task_id="hard-needs-order")) observation, _, _, _ = asyncio.run(env.step("edit_config: fix deploy dependency ordering")) self.assertIn("needs: [build, test]", observation["config"]) self.assertEqual(observation["config"].count("needs: build"), 1) if __name__ == "__main__": unittest.main()