Spaces:
Sleeping
Sleeping
| import asyncio | |
| import unittest | |
| from env.environment import CICDDebuggerEnvironment, REQUIRED_TOOLS | |
| class EnvironmentContractTests(unittest.TestCase): | |
| def test_reset_returns_structured_observation(self): | |
| env = CICDDebuggerEnvironment(max_steps=10, seed=7) | |
| observation = asyncio.run(env.reset(task_id="easy-command-typo")) | |
| self.assertIn("config", observation) | |
| self.assertIn("logs", observation) | |
| self.assertIn("error_message", observation) | |
| self.assertIn("progress_flags", observation) | |
| self.assertEqual(observation["task_id"], "easy-command-typo") | |
| self.assertEqual(observation["available_tools"], REQUIRED_TOOLS) | |
| self.assertEqual(observation["step_count"], 0) | |
| def test_step_returns_obs_reward_done_info(self): | |
| env = CICDDebuggerEnvironment(max_steps=10, seed=3) | |
| asyncio.run(env.reset(task_id="easy-command-typo")) | |
| observation, reward, done, info = asyncio.run(env.step("read_logs: inspect failing stage logs")) | |
| self.assertIsInstance(observation, dict) | |
| self.assertIsInstance(reward, float) | |
| self.assertIsInstance(done, bool) | |
| self.assertIsInstance(info, dict) | |
| self.assertIn("tool", info) | |
| def test_action_space_rejects_extra_tools(self): | |
| env = CICDDebuggerEnvironment(max_steps=10, seed=5) | |
| asyncio.run(env.reset(task_id="easy-command-typo")) | |
| observation, reward, done, info = asyncio.run(env.step("propose_fix: force deploy")) | |
| self.assertIn("error", info) | |
| self.assertIsNotNone(info["error"]) | |
| self.assertFalse(done) | |
| self.assertGreaterEqual(reward, 0.0) | |
| self.assertIn("config", observation) | |
| def test_action_space_rejects_alias_tools(self): | |
| env = CICDDebuggerEnvironment(max_steps=10, seed=15) | |
| asyncio.run(env.reset(task_id="easy-command-typo")) | |
| _, _, done, info = asyncio.run(env.step("read: workflow file")) | |
| self.assertIn("error", info) | |
| self.assertIsNotNone(info["error"]) | |
| self.assertFalse(done) | |
| def test_submit_solution_path(self): | |
| env = CICDDebuggerEnvironment(max_steps=12, seed=9) | |
| asyncio.run(env.reset(task_id="easy-command-typo")) | |
| asyncio.run(env.step("read_logs: inspect logs")) | |
| asyncio.run(env.step("analyze_error: identify root cause")) | |
| asyncio.run(env.step("edit_config: replace npm tset with npm test")) | |
| asyncio.run(env.step("run_pipeline_stage: run test stage")) | |
| asyncio.run(env.step("run_tests: execute tests")) | |
| asyncio.run(env.step("validate_fix: validate score")) | |
| observation, reward, done, info = asyncio.run(env.step("submit_solution: submit current fix")) | |
| self.assertTrue(done) | |
| self.assertGreaterEqual(reward, 0.0) | |
| self.assertIsNone(info.get("error")) | |
| self.assertEqual(observation["progress_flags"].get("submit_solution"), True) | |
| def test_internal_state_tracks_required_fields(self): | |
| env = CICDDebuggerEnvironment(max_steps=10, seed=11) | |
| asyncio.run(env.reset(task_id="easy-command-typo")) | |
| asyncio.run(env.step("read_logs: inspect logs")) | |
| state = env.get_state() | |
| self.assertTrue(state.get("initialized")) | |
| self.assertIn("actual_bug", state) | |
| self.assertIn("correct_solution", state) | |
| self.assertIn("progress_flags", state) | |
| self.assertIn("file_modification_count", state) | |
| self.assertIn("hidden_test_pass_rate", state) | |
| def test_yaml_task_is_fixable_via_edit_flow(self): | |
| env = CICDDebuggerEnvironment(max_steps=12, seed=17) | |
| asyncio.run(env.reset(task_id="easy-yaml-indentation")) | |
| asyncio.run(env.step("read_logs: inspect logs")) | |
| asyncio.run(env.step("analyze_error: identify root cause")) | |
| observation, _, _, _ = asyncio.run(env.step("edit_config: fix YAML indentation and syntax")) | |
| self.assertIn("- run: pytest", observation["config"]) | |
| self.assertNotIn(" - run: pytest", observation["config"]) | |
| asyncio.run(env.step("run_tests: execute tests")) | |
| asyncio.run(env.step("validate_fix: validate score")) | |
| _, _, done, info = asyncio.run(env.step("submit_solution: submit current fix")) | |
| self.assertTrue(done) | |
| self.assertIsNone(info.get("error")) | |
| def test_hard_needs_order_edit_updates_deploy_dependency(self): | |
| env = CICDDebuggerEnvironment(max_steps=12, seed=19) | |
| asyncio.run(env.reset(task_id="hard-needs-order")) | |
| observation, _, _, _ = asyncio.run(env.step("edit_config: fix deploy dependency ordering")) | |
| self.assertIn("needs: [build, test]", observation["config"]) | |
| self.assertEqual(observation["config"].count("needs: build"), 1) | |
| if __name__ == "__main__": | |
| unittest.main() | |