Spaces:
Sleeping
Sleeping
| """Tests for SentinelEnvironment reset, step, submit, and termination.""" | |
| import pytest | |
| from models import SentinelAction | |
| from server.sentinel_environment import SentinelEnvironment, MAX_STEPS, MAX_CONSECUTIVE_INVALID | |
| def _action(tool_name: str, **params): | |
| return SentinelAction.model_validate({"tool_name": tool_name, "parameters": params}) | |
| class TestReset: | |
| def test_reset_returns_observation(self, env): | |
| obs = env.reset(task_id=1) | |
| assert obs.done is False | |
| assert obs.incident_summary != "" | |
| assert obs.step_number == 0 | |
| def test_reset_includes_tool_descriptions(self, env): | |
| obs = env.reset(task_id=1) | |
| assert obs.tool_descriptions != {} | |
| assert "query_logs" in obs.tool_descriptions | |
| def test_reset_invalid_task_defaults_to_1(self, env): | |
| obs = env.reset(task_id=999) | |
| assert obs.done is False | |
| assert env.state.task_id == 1 | |
| class TestStep: | |
| def test_step_without_reset(self, env): | |
| action = _action("get_service_status", service="auth") | |
| obs = env.step(action) | |
| assert obs.done is True | |
| assert "not reset" in obs.last_action_error.lower() | |
| def test_valid_step_returns_output(self, env): | |
| env.reset(task_id=1) | |
| action = _action("get_service_status", service="payment-api") | |
| obs = env.step(action) | |
| assert obs.tool_output != "" | |
| assert obs.done is False | |
| assert obs.step_number == 1 | |
| def test_step_no_tool_descriptions(self, env): | |
| env.reset(task_id=1) | |
| action = _action("get_service_status", service="payment-api") | |
| obs = env.step(action) | |
| assert obs.tool_descriptions == {} | |
| def test_invalid_tool(self, env): | |
| env.reset(task_id=1) | |
| # Use a raw dict bypass since pydantic rejects unknown tools | |
| # Instead, test via unknown service which is still valid dispatch | |
| action = _action("get_service_status", service="nonexistent") | |
| obs = env.step(action) | |
| assert obs.done is False # valid tool, just unknown service | |
| class TestSubmit: | |
| def test_submit_resolution_grades(self, env): | |
| env.reset(task_id=1) | |
| action = _action( | |
| "submit_resolution", | |
| root_cause="Missing DB_CONNECTION_STRING after v2.3.1 deploy", | |
| affected_service="payment-api", | |
| recommendation="Rollback to v2.3.0", | |
| ) | |
| obs = env.step(action) | |
| assert obs.done is True | |
| assert obs.reward is not None | |
| assert obs.reward > 0 | |
| def test_submit_missing_fields(self, env): | |
| env.reset(task_id=1) | |
| action = _action( | |
| "submit_resolution", | |
| root_cause="", | |
| affected_service="", | |
| recommendation="", | |
| ) | |
| obs = env.step(action) | |
| assert obs.last_action_error != "" | |
| assert obs.done is False | |
| class TestTermination: | |
| def test_max_steps(self, env): | |
| env.reset(task_id=1) | |
| for _ in range(MAX_STEPS): | |
| action = _action("get_service_status", service="payment-api") | |
| obs = env.step(action) | |
| assert obs.done is True | |
| assert "maximum steps" in obs.tool_output.lower() | |
| def test_consecutive_invalid_not_triggered_by_valid(self, env): | |
| env.reset(task_id=1) | |
| for _ in range(MAX_CONSECUTIVE_INVALID + 1): | |
| action = _action("get_service_status", service="payment-api") | |
| obs = env.step(action) | |
| assert obs.done is False # valid actions don't trigger termination | |