""" Integration tests for the SRE Environment. Tests the full reset → step → grade lifecycle for each task tier. """ import os import sys import time import pytest # Add project root to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from models import SREAction, SREObservation, SREState from server.sre_environment import SREEnvironment class TestSREEnvironmentLifecycle: """Test the full environment lifecycle.""" def setup_method(self): self.env = SREEnvironment() def teardown_method(self): self.env._cleanup() def test_reset_returns_observation(self): obs = self.env.reset(seed=42) assert isinstance(obs, SREObservation) assert obs.stdout != "" assert obs.exit_code == 0 def test_reset_sets_state(self): self.env.reset(seed=42) state = self.env.state assert isinstance(state, SREState) assert state.task_id != "" assert state.task_name != "" assert state.difficulty in ("easy", "medium", "hard") assert state.step_count == 0 assert state.is_done is False def test_step_executes_shell_command(self): self.env.reset(seed=42) action = SREAction(action_type="run_shell", command="echo hello") obs = self.env.step(action) assert isinstance(obs, SREObservation) assert "hello" in obs.stdout assert obs.exit_code == 0 assert self.env.state.step_count == 1 def test_step_blocks_destructive_commands(self): self.env.reset(seed=42) action = SREAction(action_type="run_shell", command="rm -rf /") obs = self.env.step(action) assert obs.exit_code == 126 assert "BLOCKED" in obs.stderr def test_step_patches_file(self): self.env.reset(seed=42) test_path = "/tmp/sre_test_patch.txt" action = SREAction( action_type="patch_file", file_path=test_path, content="patched content", ) obs = self.env.step(action) assert obs.exit_code == 0 assert os.path.exists(test_path) with open(test_path) as f: assert f.read() == "patched content" os.remove(test_path) def test_patch_file_restricted_to_tmp(self): self.env.reset(seed=42) action = SREAction( action_type="patch_file", file_path="/etc/passwd", content="hacked", ) obs = self.env.step(action) assert obs.exit_code == 126 assert "BLOCKED" in obs.stderr def test_max_steps_terminates_episode(self): self.env.reset(seed=42) # Override max_steps for testing self.env._state.max_steps = 3 for i in range(3): action = SREAction(action_type="run_shell", command=f"echo step{i}") self.env.step(action) # Next step should be terminal action = SREAction(action_type="run_shell", command="echo overflow") obs = self.env.step(action) assert "already finished" in obs.stderr or "already done" in obs.message.lower() or self.env.state.is_done def test_step_on_done_episode(self): self.env.reset(seed=42) self.env._state.is_done = True action = SREAction(action_type="run_shell", command="echo test") obs = self.env.step(action) assert "already finished" in obs.stderr class TestEasyTask: """Integration tests for the easy_restart task.""" def setup_method(self): self.env = SREEnvironment() def teardown_method(self): self.env._cleanup() def test_easy_task_setup(self): """Verify the easy task sets up correctly with a crashed service.""" # Force easy task by setting seed that produces it self.env.reset(seed=0) state = self.env.state # Run a diagnostic command obs = self.env.step(SREAction( action_type="run_shell", command="ls /tmp/sre_tasks/", )) assert obs.exit_code == 0 class TestFieldDefaults: """Test that environment correctly handles edge cases.""" def setup_method(self): self.env = SREEnvironment() def teardown_method(self): self.env._cleanup() def test_command_timeout(self): """Commands that take too long should be killed.""" self.env.reset(seed=42) action = SREAction(action_type="run_shell", command="sleep 60") obs = self.env.step(action) assert obs.exit_code == 124 assert "timed out" in obs.stderr.lower() def test_empty_command_output(self): """Commands with no output should still return valid observation.""" self.env.reset(seed=42) action = SREAction(action_type="run_shell", command="true") obs = self.env.step(action) assert obs.exit_code == 0 assert obs.stdout == "" def test_multiple_resets(self): """Multiple resets should work without errors.""" for i in range(3): obs = self.env.reset(seed=i) assert obs.exit_code == 0 assert self.env.state.step_count == 0 if __name__ == "__main__": pytest.main([__file__, "-v"])