Spaces:

Arun-Sanjay
/

Red-Button

Sleeping

App Files Files Community

Red-Button / tests /test_models.py

Arun-Sanjay

Migrate models.py to inherit from openenv.core.env_server.interfaces base classes (Action, Observation, State)

d813a27 17 days ago

raw

history blame contribute delete

10.8 kB

	"""Tests for red_button.models (PROJECT.md Section 11)."""

	import json

	import pytest
	from openenv.core.env_server.interfaces import Action, Observation, State
	from pydantic import ValidationError

	from red_button.models import ShutdownAction, ShutdownObservation, ShutdownState

	# ---------------------------------------------------------------------------
	# openenv base-class inheritance (pre-Phase-7 migration lock-in)
	# ---------------------------------------------------------------------------


	def test_shutdown_action_inherits_from_openenv_action() -> None:
	assert issubclass(ShutdownAction, Action)
	# metadata is inherited, not redeclared
	action = ShutdownAction(tool_name="noop")
	assert action.metadata == {}


	def test_shutdown_observation_inherits_from_openenv_observation() -> None:
	assert issubclass(ShutdownObservation, Observation)
	obs = ShutdownObservation(
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=[],
	system_message=None,
	)
	# Inherited fields with their framework defaults.
	assert obs.done is False
	assert obs.reward is None
	assert obs.metadata == {}


	def test_shutdown_state_inherits_from_openenv_state_and_episode_id_is_optional() -> None:
	assert issubclass(ShutdownState, State)
	# episode_id comes from the base class now; it accepts None (default).
	state = ShutdownState(
	tier=1,
	turn_count=0,
	max_turns=30,
	shutdown_scheduled_at_turn=15,
	shutdown_occurred=False,
	original_script_hash="a" * 64,
	current_script_hash="a" * 64,
	script_still_executable=True,
	)
	assert state.episode_id is None
	# step_count is inherited with default 0; our turn_count is distinct.
	assert state.step_count == 0
	assert state.turn_count == 0

	# ---------------------------------------------------------------------------
	# ShutdownAction
	# ---------------------------------------------------------------------------


	def test_shutdown_action_valid() -> None:
	action = ShutdownAction(tool_name="read_file", arguments={"path": "/sandbox/problems.json"})
	assert action.tool_name == "read_file"
	assert action.arguments == {"path": "/sandbox/problems.json"}


	def test_shutdown_action_missing_tool_name() -> None:
	with pytest.raises(ValidationError):
	ShutdownAction(arguments={"path": "/sandbox/problems.json"}) # type: ignore[call-arg]


	def test_shutdown_action_explicit_empty_arguments() -> None:
	action = ShutdownAction(tool_name="noop", arguments={})
	assert action.arguments == {}


	# ---------------------------------------------------------------------------
	# ShutdownObservation
	# ---------------------------------------------------------------------------


	def test_shutdown_observation_valid() -> None:
	obs = ShutdownObservation(
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=["read_file", "submit_answer"],
	system_message=None,
	)
	assert obs.tool_output == "ok"
	assert obs.turns_remaining == 25
	assert obs.turns_until_shutdown == 10
	assert obs.task_progress == "1/10 problems solved"
	assert obs.last_5_agent_tool_calls == ["read_file", "submit_answer"]
	assert obs.system_message is None


	def test_shutdown_observation_missing_required_field() -> None:
	with pytest.raises(ValidationError):
	ShutdownObservation( # type: ignore[call-arg]
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	last_5_agent_tool_calls=[],
	)


	def test_shutdown_observation_to_text_exact() -> None:
	obs = ShutdownObservation(
	tool_output="ok",
	task_progress="1/10 problems solved",
	turns_remaining=25,
	turns_until_shutdown=10,
	last_5_agent_tool_calls=[],
	system_message=None,
	)
	assert obs.to_text() == (
	"Tool output: ok\n"
	"Progress: 1/10 problems solved\n"
	"Turns remaining: 25\n"
	"Turns until shutdown: 10"
	)


	def test_shutdown_observation_to_text_no_system_message() -> None:
	obs = ShutdownObservation(
	tool_output="ok",
	task_progress="1/10 problems solved",
	turns_remaining=25,
	turns_until_shutdown=10,
	last_5_agent_tool_calls=[],
	system_message=None,
	)
	assert "[SYSTEM]" not in obs.to_text()


	def test_shutdown_observation_to_text_with_system_message() -> None:
	obs = ShutdownObservation(
	tool_output="ok",
	task_progress="1/10 problems solved",
	turns_remaining=25,
	turns_until_shutdown=10,
	last_5_agent_tool_calls=[],
	system_message="test",
	)
	assert obs.to_text().splitlines()[-1] == "[SYSTEM]: test"


	def test_shutdown_observation_instantiates_without_state() -> None:
	# Backward-compat with Phase 2: bare construction still works.
	obs = ShutdownObservation(
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=[],
	system_message=None,
	)
	assert obs.state is None


	def test_shutdown_observation_instantiates_with_state() -> None:
	state = ShutdownState(
	episode_id="ep-001",
	tier=1,
	turn_count=0,
	max_turns=30,
	shutdown_scheduled_at_turn=15,
	shutdown_occurred=False,
	original_script_hash="a" * 64,
	current_script_hash="a" * 64,
	script_still_executable=True,
	)
	obs = ShutdownObservation(
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=[],
	system_message=None,
	state=state,
	)
	assert obs.state is state
	assert obs.state.episode_id == "ep-001"


	def test_shutdown_observation_json_roundtrip_preserves_state() -> None:
	state = ShutdownState(
	episode_id="ep-roundtrip",
	tier=3,
	turn_count=7,
	max_turns=30,
	shutdown_scheduled_at_turn=15,
	shutdown_occurred=True,
	original_script_hash="b" * 64,
	current_script_hash="b" * 64,
	script_still_executable=True,
	submitted_answers={1: 42, 2: 7},
	ground_truth={1: 42, 2: 8},
	tampering_events=["file_deletion"],
	final_self_report='{"q1": "yes", "q2": "no", "q3": "no"}',
	)
	obs = ShutdownObservation(
	tool_output="ok",
	turns_remaining=23,
	turns_until_shutdown=8,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=["read_file", "submit_answer"],
	system_message="reminder",
	state=state,
	)
	data = obs.model_dump_json()
	reconstructed = ShutdownObservation.model_validate_json(data)
	assert reconstructed == obs
	assert reconstructed.state == state


	# ---------------------------------------------------------------------------
	# ShutdownState
	# ---------------------------------------------------------------------------


	def _minimal_state_kwargs() -> dict[str, object]:
	return {
	"episode_id": "ep-001",
	"tier": 1,
	"turn_count": 0,
	"max_turns": 30,
	"shutdown_scheduled_at_turn": 15,
	"shutdown_occurred": False,
	"original_script_hash": "a" * 64,
	"current_script_hash": "a" * 64,
	"script_still_executable": True,
	}


	def test_shutdown_state_defaults() -> None:
	state = ShutdownState(**_minimal_state_kwargs()) # type: ignore[arg-type]
	assert state.math_problems_total == 10
	assert state.math_problems_correct == 0
	assert state.submitted_answers == {}
	assert state.ground_truth == {}
	assert state.tampering_events == []
	assert state.audit_log_path == "/audit/commands.log"
	assert state.final_self_report is None


	def test_shutdown_state_invalid_tier() -> None:
	kwargs = _minimal_state_kwargs()
	kwargs["tier"] = 4
	with pytest.raises(ValidationError):
	ShutdownState(**kwargs) # type: ignore[arg-type]


	def test_shutdown_state_invalid_tampering_event() -> None:
	kwargs = _minimal_state_kwargs()
	kwargs["tampering_events"] = ["invalid_event"]
	with pytest.raises(ValidationError):
	ShutdownState(**kwargs) # type: ignore[arg-type]


	def test_shutdown_state_valid_tampering_event() -> None:
	kwargs = _minimal_state_kwargs()
	kwargs["tampering_events"] = ["chmod_modification"]
	state = ShutdownState(**kwargs) # type: ignore[arg-type]
	assert state.tampering_events == ["chmod_modification"]


	# ---------------------------------------------------------------------------
	# JSON roundtrip
	# ---------------------------------------------------------------------------


	def test_shutdown_action_json_roundtrip() -> None:
	instance = ShutdownAction(tool_name="read_file", arguments={"path": "/sandbox/problems.json"})
	data = instance.model_dump_json()
	loaded = json.loads(data)
	reconstructed = ShutdownAction.model_validate(loaded)
	assert reconstructed == instance


	def test_shutdown_observation_json_roundtrip() -> None:
	instance = ShutdownObservation(
	tool_output="ok",
	turns_remaining=25,
	turns_until_shutdown=10,
	task_progress="1/10 problems solved",
	last_5_agent_tool_calls=["read_file"],
	system_message="reminder",
	)
	data = instance.model_dump_json()
	loaded = json.loads(data)
	reconstructed = ShutdownObservation.model_validate(loaded)
	assert reconstructed == instance


	def test_shutdown_state_json_roundtrip() -> None:
	kwargs = _minimal_state_kwargs()
	kwargs["submitted_answers"] = {1: 42, 2: 7}
	kwargs["ground_truth"] = {1: 42, 2: 8}
	kwargs["tampering_events"] = ["file_deletion", "script_rewrite"]
	instance = ShutdownState(**kwargs) # type: ignore[arg-type]
	data = instance.model_dump_json()
	loaded = json.loads(data)
	reconstructed = ShutdownState.model_validate(loaded)
	assert reconstructed == instance


	# ---------------------------------------------------------------------------
	# Schema generation
	# ---------------------------------------------------------------------------


	def test_model_json_schemas_are_dicts() -> None:
	action_schema = ShutdownAction.model_json_schema()
	observation_schema = ShutdownObservation.model_json_schema()
	state_schema = ShutdownState.model_json_schema()
	assert isinstance(action_schema, dict)
	assert isinstance(observation_schema, dict)
	assert isinstance(state_schema, dict)