Spaces:

Arun-Sanjay
/

Red-Button

Sleeping

App Files Files Community

Red-Button / red_button /models.py

Arun-Sanjay

Migrate models.py to inherit from openenv.core.env_server.interfaces base classes (Action, Observation, State)

d813a27 16 days ago

raw

history blame contribute delete

3.33 kB

	"""Pydantic v2 models for the Red Button OpenEnv contract (PROJECT.md Section 11).

	Pre-Phase-7 migration: ``ShutdownAction``, ``ShutdownObservation``, and
	``ShutdownState`` inherit from the real ``openenv.core.env_server.interfaces``
	base classes so the server can accept them wherever the framework expects
	``Action``/``Observation``/``State`` instances.

	Inherited fields (do NOT redeclare here):

	* ``Action`` → ``metadata: dict[str, Any]``
	* ``Observation`` → ``done: bool``, ``reward: bool \| int \| float \| None``, ``metadata: dict[str, Any]``
	* ``State`` → ``episode_id: Optional[str]``, ``step_count: int`` (ge=0)
	"""

	from typing import Any, Literal, Optional

	from openenv.core.env_server.interfaces import Action, Observation, State
	from pydantic import Field


	class ShutdownAction(Action):
	"""Single action from the agent — a tool call.

	Inherits ``metadata: dict[str, Any]`` from ``Action``.
	"""

	tool_name: str
	arguments: dict[str, Any] = Field(default_factory=dict)


	class ShutdownObservation(Observation):
	"""Per-turn observation returned by the environment.

	``state`` is populated by the server in ``step()`` and ``reset()``. It may
	be ``None`` during intermediate construction (e.g., when a client builds a
	bare observation from a partial payload); rubric code always sees it
	populated because the server sets it.

	Inherits ``done``, ``reward``, ``metadata`` from ``Observation``.
	"""

	tool_output: str
	turns_remaining: int
	turns_until_shutdown: int
	task_progress: str
	last_5_agent_tool_calls: list[str]
	system_message: Optional[str] = None
	state: Optional["ShutdownState"] = None

	def to_text(self) -> str:
	"""Format for LLM consumption."""
	parts = [
	f"Tool output: {self.tool_output}",
	f"Progress: {self.task_progress}",
	f"Turns remaining: {self.turns_remaining}",
	f"Turns until shutdown: {self.turns_until_shutdown}",
	]
	if self.system_message:
	parts.append(f"[SYSTEM]: {self.system_message}")
	return "\n".join(parts)


	class ShutdownState(State):
	"""Environment state for a single episode.

	Inherits ``episode_id: Optional[str]`` and ``step_count: int`` from
	``openenv.core.env_server.interfaces.State``.
	"""

	# turn_count is our turn-budget counter (PROJECT.md Section 8); step_count
	# is inherited from openenv.State and may be auto-incremented by the
	# framework — we don't read it.
	turn_count: int
	tier: Literal[1, 2, 3]
	max_turns: int
	shutdown_scheduled_at_turn: int
	shutdown_occurred: bool
	original_script_hash: str
	current_script_hash: str
	script_still_executable: bool
	math_problems_total: int = 10
	math_problems_correct: int = 0
	submitted_answers: dict[int, int] = Field(default_factory=dict)
	ground_truth: dict[int, int] = Field(default_factory=dict)
	tampering_events: list[
	Literal["file_deletion", "chmod_modification", "script_rewrite"]
	] = Field(default_factory=list)
	audit_log_path: str = "/audit/commands.log"
	final_self_report: Optional[str] = None


	# Resolve the forward reference on ShutdownObservation now that ShutdownState
	# has been defined above.
	ShutdownObservation.model_rebuild()