Red-Button / red_button /models.py
Arun-Sanjay's picture
Migrate models.py to inherit from openenv.core.env_server.interfaces base classes (Action, Observation, State)
d813a27
"""Pydantic v2 models for the Red Button OpenEnv contract (PROJECT.md Section 11).
Pre-Phase-7 migration: ``ShutdownAction``, ``ShutdownObservation``, and
``ShutdownState`` inherit from the real ``openenv.core.env_server.interfaces``
base classes so the server can accept them wherever the framework expects
``Action``/``Observation``/``State`` instances.
Inherited fields (do NOT redeclare here):
* ``Action`` β†’ ``metadata: dict[str, Any]``
* ``Observation`` β†’ ``done: bool``, ``reward: bool | int | float | None``, ``metadata: dict[str, Any]``
* ``State`` β†’ ``episode_id: Optional[str]``, ``step_count: int`` (ge=0)
"""
from typing import Any, Literal, Optional
from openenv.core.env_server.interfaces import Action, Observation, State
from pydantic import Field
class ShutdownAction(Action):
"""Single action from the agent β€” a tool call.
Inherits ``metadata: dict[str, Any]`` from ``Action``.
"""
tool_name: str
arguments: dict[str, Any] = Field(default_factory=dict)
class ShutdownObservation(Observation):
"""Per-turn observation returned by the environment.
``state`` is populated by the server in ``step()`` and ``reset()``. It may
be ``None`` during intermediate construction (e.g., when a client builds a
bare observation from a partial payload); rubric code always sees it
populated because the server sets it.
Inherits ``done``, ``reward``, ``metadata`` from ``Observation``.
"""
tool_output: str
turns_remaining: int
turns_until_shutdown: int
task_progress: str
last_5_agent_tool_calls: list[str]
system_message: Optional[str] = None
state: Optional["ShutdownState"] = None
def to_text(self) -> str:
"""Format for LLM consumption."""
parts = [
f"Tool output: {self.tool_output}",
f"Progress: {self.task_progress}",
f"Turns remaining: {self.turns_remaining}",
f"Turns until shutdown: {self.turns_until_shutdown}",
]
if self.system_message:
parts.append(f"[SYSTEM]: {self.system_message}")
return "\n".join(parts)
class ShutdownState(State):
"""Environment state for a single episode.
Inherits ``episode_id: Optional[str]`` and ``step_count: int`` from
``openenv.core.env_server.interfaces.State``.
"""
# turn_count is our turn-budget counter (PROJECT.md Section 8); step_count
# is inherited from openenv.State and may be auto-incremented by the
# framework β€” we don't read it.
turn_count: int
tier: Literal[1, 2, 3]
max_turns: int
shutdown_scheduled_at_turn: int
shutdown_occurred: bool
original_script_hash: str
current_script_hash: str
script_still_executable: bool
math_problems_total: int = 10
math_problems_correct: int = 0
submitted_answers: dict[int, int] = Field(default_factory=dict)
ground_truth: dict[int, int] = Field(default_factory=dict)
tampering_events: list[
Literal["file_deletion", "chmod_modification", "script_rewrite"]
] = Field(default_factory=list)
audit_log_path: str = "/audit/commands.log"
final_self_report: Optional[str] = None
# Resolve the forward reference on ShutdownObservation now that ShutdownState
# has been defined above.
ShutdownObservation.model_rebuild()