Spaces:
Sleeping
Sleeping
| """ | |
| Integration tests for the SRE Environment. | |
| Tests the full reset → step → grade lifecycle for each task tier. | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import pytest | |
| # Add project root to path for imports | |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from models import SREAction, SREObservation, SREState | |
| from server.sre_environment import SREEnvironment | |
| class TestSREEnvironmentLifecycle: | |
| """Test the full environment lifecycle.""" | |
| def setup_method(self): | |
| self.env = SREEnvironment() | |
| def teardown_method(self): | |
| self.env._cleanup() | |
| def test_reset_returns_observation(self): | |
| obs = self.env.reset(seed=42) | |
| assert isinstance(obs, SREObservation) | |
| assert obs.stdout != "" | |
| assert obs.exit_code == 0 | |
| def test_reset_sets_state(self): | |
| self.env.reset(seed=42) | |
| state = self.env.state | |
| assert isinstance(state, SREState) | |
| assert state.task_id != "" | |
| assert state.task_name != "" | |
| assert state.difficulty in ("easy", "medium", "hard") | |
| assert state.step_count == 0 | |
| assert state.is_done is False | |
| def test_step_executes_shell_command(self): | |
| self.env.reset(seed=42) | |
| action = SREAction(action_type="run_shell", command="echo hello") | |
| obs = self.env.step(action) | |
| assert isinstance(obs, SREObservation) | |
| assert "hello" in obs.stdout | |
| assert obs.exit_code == 0 | |
| assert self.env.state.step_count == 1 | |
| def test_step_blocks_destructive_commands(self): | |
| self.env.reset(seed=42) | |
| action = SREAction(action_type="run_shell", command="rm -rf /") | |
| obs = self.env.step(action) | |
| assert obs.exit_code == 126 | |
| assert "BLOCKED" in obs.stderr | |
| def test_step_patches_file(self): | |
| self.env.reset(seed=42) | |
| test_path = "/tmp/sre_test_patch.txt" | |
| action = SREAction( | |
| action_type="patch_file", | |
| file_path=test_path, | |
| content="patched content", | |
| ) | |
| obs = self.env.step(action) | |
| assert obs.exit_code == 0 | |
| assert os.path.exists(test_path) | |
| with open(test_path) as f: | |
| assert f.read() == "patched content" | |
| os.remove(test_path) | |
| def test_patch_file_restricted_to_tmp(self): | |
| self.env.reset(seed=42) | |
| action = SREAction( | |
| action_type="patch_file", | |
| file_path="/etc/passwd", | |
| content="hacked", | |
| ) | |
| obs = self.env.step(action) | |
| assert obs.exit_code == 126 | |
| assert "BLOCKED" in obs.stderr | |
| def test_max_steps_terminates_episode(self): | |
| self.env.reset(seed=42) | |
| # Override max_steps for testing | |
| self.env._state.max_steps = 3 | |
| for i in range(3): | |
| action = SREAction(action_type="run_shell", command=f"echo step{i}") | |
| self.env.step(action) | |
| # Next step should be terminal | |
| action = SREAction(action_type="run_shell", command="echo overflow") | |
| obs = self.env.step(action) | |
| assert "already finished" in obs.stderr or "already done" in obs.message.lower() or self.env.state.is_done | |
| def test_step_on_done_episode(self): | |
| self.env.reset(seed=42) | |
| self.env._state.is_done = True | |
| action = SREAction(action_type="run_shell", command="echo test") | |
| obs = self.env.step(action) | |
| assert "already finished" in obs.stderr | |
| class TestEasyTask: | |
| """Integration tests for the easy_restart task.""" | |
| def setup_method(self): | |
| self.env = SREEnvironment() | |
| def teardown_method(self): | |
| self.env._cleanup() | |
| def test_easy_task_setup(self): | |
| """Verify the easy task sets up correctly with a crashed service.""" | |
| # Force easy task by setting seed that produces it | |
| self.env.reset(seed=0) | |
| state = self.env.state | |
| # Run a diagnostic command | |
| obs = self.env.step(SREAction( | |
| action_type="run_shell", | |
| command="ls /tmp/sre_tasks/", | |
| )) | |
| assert obs.exit_code == 0 | |
| class TestFieldDefaults: | |
| """Test that environment correctly handles edge cases.""" | |
| def setup_method(self): | |
| self.env = SREEnvironment() | |
| def teardown_method(self): | |
| self.env._cleanup() | |
| def test_command_timeout(self): | |
| """Commands that take too long should be killed.""" | |
| self.env.reset(seed=42) | |
| action = SREAction(action_type="run_shell", command="sleep 60") | |
| obs = self.env.step(action) | |
| assert obs.exit_code == 124 | |
| assert "timed out" in obs.stderr.lower() | |
| def test_empty_command_output(self): | |
| """Commands with no output should still return valid observation.""" | |
| self.env.reset(seed=42) | |
| action = SREAction(action_type="run_shell", command="true") | |
| obs = self.env.step(action) | |
| assert obs.exit_code == 0 | |
| assert obs.stdout == "" | |
| def test_multiple_resets(self): | |
| """Multiple resets should work without errors.""" | |
| for i in range(3): | |
| obs = self.env.reset(seed=i) | |
| assert obs.exit_code == 0 | |
| assert self.env.state.step_count == 0 | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-v"]) | |