Spaces:

NoNameFound
/

sentinel_env

Sleeping

App Files Files Community

sentinel_env / tests /test_environment.py

KaushikSarveswaran

Initial submission: OpenEnv-Sentinel SRE triage environment

33dd3ee about 1 month ago

raw

history blame contribute delete

3.54 kB

	"""Tests for SentinelEnvironment reset, step, submit, and termination."""

	import pytest

	from models import SentinelAction
	from server.sentinel_environment import SentinelEnvironment, MAX_STEPS, MAX_CONSECUTIVE_INVALID


	def _action(tool_name: str, **params):
	return SentinelAction.model_validate({"tool_name": tool_name, "parameters": params})


	class TestReset:
	def test_reset_returns_observation(self, env):
	obs = env.reset(task_id=1)
	assert obs.done is False
	assert obs.incident_summary != ""
	assert obs.step_number == 0

	def test_reset_includes_tool_descriptions(self, env):
	obs = env.reset(task_id=1)
	assert obs.tool_descriptions != {}
	assert "query_logs" in obs.tool_descriptions

	def test_reset_invalid_task_defaults_to_1(self, env):
	obs = env.reset(task_id=999)
	assert obs.done is False
	assert env.state.task_id == 1


	class TestStep:
	def test_step_without_reset(self, env):
	action = _action("get_service_status", service="auth")
	obs = env.step(action)
	assert obs.done is True
	assert "not reset" in obs.last_action_error.lower()

	def test_valid_step_returns_output(self, env):
	env.reset(task_id=1)
	action = _action("get_service_status", service="payment-api")
	obs = env.step(action)
	assert obs.tool_output != ""
	assert obs.done is False
	assert obs.step_number == 1

	def test_step_no_tool_descriptions(self, env):
	env.reset(task_id=1)
	action = _action("get_service_status", service="payment-api")
	obs = env.step(action)
	assert obs.tool_descriptions == {}

	def test_invalid_tool(self, env):
	env.reset(task_id=1)
	# Use a raw dict bypass since pydantic rejects unknown tools
	# Instead, test via unknown service which is still valid dispatch
	action = _action("get_service_status", service="nonexistent")
	obs = env.step(action)
	assert obs.done is False # valid tool, just unknown service


	class TestSubmit:
	def test_submit_resolution_grades(self, env):
	env.reset(task_id=1)
	action = _action(
	"submit_resolution",
	root_cause="Missing DB_CONNECTION_STRING after v2.3.1 deploy",
	affected_service="payment-api",
	recommendation="Rollback to v2.3.0",
	)
	obs = env.step(action)
	assert obs.done is True
	assert obs.reward is not None
	assert obs.reward > 0

	def test_submit_missing_fields(self, env):
	env.reset(task_id=1)
	action = _action(
	"submit_resolution",
	root_cause="",
	affected_service="",
	recommendation="",
	)
	obs = env.step(action)
	assert obs.last_action_error != ""
	assert obs.done is False


	class TestTermination:
	def test_max_steps(self, env):
	env.reset(task_id=1)
	for _ in range(MAX_STEPS):
	action = _action("get_service_status", service="payment-api")
	obs = env.step(action)
	assert obs.done is True
	assert "maximum steps" in obs.tool_output.lower()

	def test_consecutive_invalid_not_triggered_by_valid(self, env):
	env.reset(task_id=1)
	for _ in range(MAX_CONSECUTIVE_INVALID + 1):
	action = _action("get_service_status", service="payment-api")
	obs = env.step(action)
	assert obs.done is False # valid actions don't trigger termination