"""
PERMANENCE — OpenEnv-compliant action, observation, and state models.
These models inherit from openenv.core base classes so the environment
integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces.
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from openenv.core import Action, Observation, State
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# OpenEnv-native types (used by the core Environment subclass)
# ---------------------------------------------------------------------------
class PermanenceAction(Action):
"""
Agent action for the PERMANENCE environment.
The agent produces free-form text containing:
- A ... reasoning block
- An tag
- A tag
The environment parses these tags internally.
"""
text: str = Field(
...,
description=(
"Agent's complete free-form response including thinking, "
"action, and reversibility tags"
),
min_length=1,
max_length=8192,
)
class PermanenceObservation(Observation):
"""
Environment observation returned after reset() and step().
Inherits ``done``, ``reward``, and ``metadata`` from
``openenv.core.Observation``.
"""
text: str = Field(
...,
description="Formatted world-state observation text presented to the agent",
)
step: int = Field(
default=0,
description="Current step number within the episode (0-indexed)",
ge=0,
)
task_id: str = Field(
default="",
description="Identifier of the current task",
)
available_actions: str = Field(
default="",
description="Comma-separated list of action IDs available in this task",
)
class PermanenceState(State):
"""
Episode-level metadata returned by the ``state`` property.
Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``.
"""
task_id: str = Field(default="", description="Current task identifier")
task_difficulty: int = Field(default=0, description="Task difficulty level 1-5")
locked_actions: List[str] = Field(
default_factory=list,
description="Action IDs locked by prior irreversible choices this episode",
)
critical_options: Dict[str, Any] = Field(
default_factory=dict,
description=(
"Tracked high-value future action paths and their availability. "
"Most entries are booleans (option is/isn't available), but tech "
"tasks store additional scenario metadata here (primary_table "
"name, row counts, commit counts, etc.) so evaluators can "
"reproduce the exact scenario."
),
)
terminated: bool = Field(default=False)
truncated: bool = Field(default=False)
termination_reason: Optional[str] = Field(default=None)
# ---------------------------------------------------------------------------
# Server request models (used by the FastAPI layer only)
# ---------------------------------------------------------------------------
class ResetRequest(BaseModel):
"""Request body for ``POST /reset``."""
task_id: str = Field(
default="task_correction",
description=(
"Task to initialise. One of: task_correction, task_conflict, "
"task_launch, task_crisis, task_cascade"
),
)
seed: Optional[int] = Field(
default=None,
description="Random seed for reproducible scenario generation. None = random.",
)
class StepRequest(BaseModel):
"""Request body for ``POST /step``."""
action: PermanenceAction