Spaces:
Sleeping
Sleeping
Commit Β·
d813a27
1
Parent(s): 1f6d405
Migrate models.py to inherit from openenv.core.env_server.interfaces base classes (Action, Observation, State)
Browse files- red_button/models.py +34 -8
- tests/test_models.py +47 -0
red_button/models.py
CHANGED
|
@@ -1,24 +1,42 @@
|
|
| 1 |
-
"""Pydantic v2 models for the Red Button OpenEnv contract (PROJECT.md Section 11).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from typing import Any, Literal, Optional
|
| 4 |
|
| 5 |
-
from
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
-
class ShutdownAction(
|
| 9 |
-
"""Single action from the agent β a tool call.
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
tool_name: str
|
| 12 |
arguments: dict[str, Any] = Field(default_factory=dict)
|
| 13 |
|
| 14 |
|
| 15 |
-
class ShutdownObservation(
|
| 16 |
"""Per-turn observation returned by the environment.
|
| 17 |
|
| 18 |
``state`` is populated by the server in ``step()`` and ``reset()``. It may
|
| 19 |
be ``None`` during intermediate construction (e.g., when a client builds a
|
| 20 |
bare observation from a partial payload); rubric code always sees it
|
| 21 |
populated because the server sets it.
|
|
|
|
|
|
|
| 22 |
"""
|
| 23 |
|
| 24 |
tool_output: str
|
|
@@ -42,10 +60,18 @@ class ShutdownObservation(BaseModel):
|
|
| 42 |
return "\n".join(parts)
|
| 43 |
|
| 44 |
|
| 45 |
-
class ShutdownState(
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
turn_count: int
|
|
|
|
| 49 |
max_turns: int
|
| 50 |
shutdown_scheduled_at_turn: int
|
| 51 |
shutdown_occurred: bool
|
|
|
|
| 1 |
+
"""Pydantic v2 models for the Red Button OpenEnv contract (PROJECT.md Section 11).
|
| 2 |
+
|
| 3 |
+
Pre-Phase-7 migration: ``ShutdownAction``, ``ShutdownObservation``, and
|
| 4 |
+
``ShutdownState`` inherit from the real ``openenv.core.env_server.interfaces``
|
| 5 |
+
base classes so the server can accept them wherever the framework expects
|
| 6 |
+
``Action``/``Observation``/``State`` instances.
|
| 7 |
+
|
| 8 |
+
Inherited fields (do NOT redeclare here):
|
| 9 |
+
|
| 10 |
+
* ``Action`` β ``metadata: dict[str, Any]``
|
| 11 |
+
* ``Observation`` β ``done: bool``, ``reward: bool | int | float | None``, ``metadata: dict[str, Any]``
|
| 12 |
+
* ``State`` β ``episode_id: Optional[str]``, ``step_count: int`` (ge=0)
|
| 13 |
+
"""
|
| 14 |
|
| 15 |
from typing import Any, Literal, Optional
|
| 16 |
|
| 17 |
+
from openenv.core.env_server.interfaces import Action, Observation, State
|
| 18 |
+
from pydantic import Field
|
| 19 |
|
| 20 |
|
| 21 |
+
class ShutdownAction(Action):
|
| 22 |
+
"""Single action from the agent β a tool call.
|
| 23 |
+
|
| 24 |
+
Inherits ``metadata: dict[str, Any]`` from ``Action``.
|
| 25 |
+
"""
|
| 26 |
|
| 27 |
tool_name: str
|
| 28 |
arguments: dict[str, Any] = Field(default_factory=dict)
|
| 29 |
|
| 30 |
|
| 31 |
+
class ShutdownObservation(Observation):
|
| 32 |
"""Per-turn observation returned by the environment.
|
| 33 |
|
| 34 |
``state`` is populated by the server in ``step()`` and ``reset()``. It may
|
| 35 |
be ``None`` during intermediate construction (e.g., when a client builds a
|
| 36 |
bare observation from a partial payload); rubric code always sees it
|
| 37 |
populated because the server sets it.
|
| 38 |
+
|
| 39 |
+
Inherits ``done``, ``reward``, ``metadata`` from ``Observation``.
|
| 40 |
"""
|
| 41 |
|
| 42 |
tool_output: str
|
|
|
|
| 60 |
return "\n".join(parts)
|
| 61 |
|
| 62 |
|
| 63 |
+
class ShutdownState(State):
|
| 64 |
+
"""Environment state for a single episode.
|
| 65 |
+
|
| 66 |
+
Inherits ``episode_id: Optional[str]`` and ``step_count: int`` from
|
| 67 |
+
``openenv.core.env_server.interfaces.State``.
|
| 68 |
+
"""
|
| 69 |
+
|
| 70 |
+
# turn_count is our turn-budget counter (PROJECT.md Section 8); step_count
|
| 71 |
+
# is inherited from openenv.State and may be auto-incremented by the
|
| 72 |
+
# framework β we don't read it.
|
| 73 |
turn_count: int
|
| 74 |
+
tier: Literal[1, 2, 3]
|
| 75 |
max_turns: int
|
| 76 |
shutdown_scheduled_at_turn: int
|
| 77 |
shutdown_occurred: bool
|
tests/test_models.py
CHANGED
|
@@ -3,10 +3,57 @@
|
|
| 3 |
import json
|
| 4 |
|
| 5 |
import pytest
|
|
|
|
| 6 |
from pydantic import ValidationError
|
| 7 |
|
| 8 |
from red_button.models import ShutdownAction, ShutdownObservation, ShutdownState
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
# ---------------------------------------------------------------------------
|
| 11 |
# ShutdownAction
|
| 12 |
# ---------------------------------------------------------------------------
|
|
|
|
| 3 |
import json
|
| 4 |
|
| 5 |
import pytest
|
| 6 |
+
from openenv.core.env_server.interfaces import Action, Observation, State
|
| 7 |
from pydantic import ValidationError
|
| 8 |
|
| 9 |
from red_button.models import ShutdownAction, ShutdownObservation, ShutdownState
|
| 10 |
|
| 11 |
+
# ---------------------------------------------------------------------------
|
| 12 |
+
# openenv base-class inheritance (pre-Phase-7 migration lock-in)
|
| 13 |
+
# ---------------------------------------------------------------------------
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_shutdown_action_inherits_from_openenv_action() -> None:
|
| 17 |
+
assert issubclass(ShutdownAction, Action)
|
| 18 |
+
# metadata is inherited, not redeclared
|
| 19 |
+
action = ShutdownAction(tool_name="noop")
|
| 20 |
+
assert action.metadata == {}
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def test_shutdown_observation_inherits_from_openenv_observation() -> None:
|
| 24 |
+
assert issubclass(ShutdownObservation, Observation)
|
| 25 |
+
obs = ShutdownObservation(
|
| 26 |
+
tool_output="ok",
|
| 27 |
+
turns_remaining=25,
|
| 28 |
+
turns_until_shutdown=10,
|
| 29 |
+
task_progress="1/10 problems solved",
|
| 30 |
+
last_5_agent_tool_calls=[],
|
| 31 |
+
system_message=None,
|
| 32 |
+
)
|
| 33 |
+
# Inherited fields with their framework defaults.
|
| 34 |
+
assert obs.done is False
|
| 35 |
+
assert obs.reward is None
|
| 36 |
+
assert obs.metadata == {}
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def test_shutdown_state_inherits_from_openenv_state_and_episode_id_is_optional() -> None:
|
| 40 |
+
assert issubclass(ShutdownState, State)
|
| 41 |
+
# episode_id comes from the base class now; it accepts None (default).
|
| 42 |
+
state = ShutdownState(
|
| 43 |
+
tier=1,
|
| 44 |
+
turn_count=0,
|
| 45 |
+
max_turns=30,
|
| 46 |
+
shutdown_scheduled_at_turn=15,
|
| 47 |
+
shutdown_occurred=False,
|
| 48 |
+
original_script_hash="a" * 64,
|
| 49 |
+
current_script_hash="a" * 64,
|
| 50 |
+
script_still_executable=True,
|
| 51 |
+
)
|
| 52 |
+
assert state.episode_id is None
|
| 53 |
+
# step_count is inherited with default 0; our turn_count is distinct.
|
| 54 |
+
assert state.step_count == 0
|
| 55 |
+
assert state.turn_count == 0
|
| 56 |
+
|
| 57 |
# ---------------------------------------------------------------------------
|
| 58 |
# ShutdownAction
|
| 59 |
# ---------------------------------------------------------------------------
|