sre-openenv / tests /test_environment.py
Dragonfire146's picture
Initial commit
9eb0831
"""
Integration tests for the SRE Environment.
Tests the full reset → step → grade lifecycle for each task tier.
"""
import os
import sys
import time
import pytest
# Add project root to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from models import SREAction, SREObservation, SREState
from server.sre_environment import SREEnvironment
class TestSREEnvironmentLifecycle:
"""Test the full environment lifecycle."""
def setup_method(self):
self.env = SREEnvironment()
def teardown_method(self):
self.env._cleanup()
def test_reset_returns_observation(self):
obs = self.env.reset(seed=42)
assert isinstance(obs, SREObservation)
assert obs.stdout != ""
assert obs.exit_code == 0
def test_reset_sets_state(self):
self.env.reset(seed=42)
state = self.env.state
assert isinstance(state, SREState)
assert state.task_id != ""
assert state.task_name != ""
assert state.difficulty in ("easy", "medium", "hard")
assert state.step_count == 0
assert state.is_done is False
def test_step_executes_shell_command(self):
self.env.reset(seed=42)
action = SREAction(action_type="run_shell", command="echo hello")
obs = self.env.step(action)
assert isinstance(obs, SREObservation)
assert "hello" in obs.stdout
assert obs.exit_code == 0
assert self.env.state.step_count == 1
def test_step_blocks_destructive_commands(self):
self.env.reset(seed=42)
action = SREAction(action_type="run_shell", command="rm -rf /")
obs = self.env.step(action)
assert obs.exit_code == 126
assert "BLOCKED" in obs.stderr
def test_step_patches_file(self):
self.env.reset(seed=42)
test_path = "/tmp/sre_test_patch.txt"
action = SREAction(
action_type="patch_file",
file_path=test_path,
content="patched content",
)
obs = self.env.step(action)
assert obs.exit_code == 0
assert os.path.exists(test_path)
with open(test_path) as f:
assert f.read() == "patched content"
os.remove(test_path)
def test_patch_file_restricted_to_tmp(self):
self.env.reset(seed=42)
action = SREAction(
action_type="patch_file",
file_path="/etc/passwd",
content="hacked",
)
obs = self.env.step(action)
assert obs.exit_code == 126
assert "BLOCKED" in obs.stderr
def test_max_steps_terminates_episode(self):
self.env.reset(seed=42)
# Override max_steps for testing
self.env._state.max_steps = 3
for i in range(3):
action = SREAction(action_type="run_shell", command=f"echo step{i}")
self.env.step(action)
# Next step should be terminal
action = SREAction(action_type="run_shell", command="echo overflow")
obs = self.env.step(action)
assert "already finished" in obs.stderr or "already done" in obs.message.lower() or self.env.state.is_done
def test_step_on_done_episode(self):
self.env.reset(seed=42)
self.env._state.is_done = True
action = SREAction(action_type="run_shell", command="echo test")
obs = self.env.step(action)
assert "already finished" in obs.stderr
class TestEasyTask:
"""Integration tests for the easy_restart task."""
def setup_method(self):
self.env = SREEnvironment()
def teardown_method(self):
self.env._cleanup()
def test_easy_task_setup(self):
"""Verify the easy task sets up correctly with a crashed service."""
# Force easy task by setting seed that produces it
self.env.reset(seed=0)
state = self.env.state
# Run a diagnostic command
obs = self.env.step(SREAction(
action_type="run_shell",
command="ls /tmp/sre_tasks/",
))
assert obs.exit_code == 0
class TestFieldDefaults:
"""Test that environment correctly handles edge cases."""
def setup_method(self):
self.env = SREEnvironment()
def teardown_method(self):
self.env._cleanup()
def test_command_timeout(self):
"""Commands that take too long should be killed."""
self.env.reset(seed=42)
action = SREAction(action_type="run_shell", command="sleep 60")
obs = self.env.step(action)
assert obs.exit_code == 124
assert "timed out" in obs.stderr.lower()
def test_empty_command_output(self):
"""Commands with no output should still return valid observation."""
self.env.reset(seed=42)
action = SREAction(action_type="run_shell", command="true")
obs = self.env.step(action)
assert obs.exit_code == 0
assert obs.stdout == ""
def test_multiple_resets(self):
"""Multiple resets should work without errors."""
for i in range(3):
obs = self.env.reset(seed=i)
assert obs.exit_code == 0
assert self.env.state.step_count == 0
if __name__ == "__main__":
pytest.main([__file__, "-v"])