""" PERMANENCE — OpenEnv-compliant action, observation, and state models. These models inherit from openenv.core base classes so the environment integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces. """ from __future__ import annotations from typing import Any, Dict, List, Optional from openenv.core import Action, Observation, State from pydantic import BaseModel, Field # --------------------------------------------------------------------------- # OpenEnv-native types (used by the core Environment subclass) # --------------------------------------------------------------------------- class PermanenceAction(Action): """ Agent action for the PERMANENCE environment. The agent produces free-form text containing: - A ... reasoning block - An tag - A tag The environment parses these tags internally. """ text: str = Field( ..., description=( "Agent's complete free-form response including thinking, " "action, and reversibility tags" ), min_length=1, max_length=8192, ) class PermanenceObservation(Observation): """ Environment observation returned after reset() and step(). Inherits ``done``, ``reward``, and ``metadata`` from ``openenv.core.Observation``. """ text: str = Field( ..., description="Formatted world-state observation text presented to the agent", ) step: int = Field( default=0, description="Current step number within the episode (0-indexed)", ge=0, ) task_id: str = Field( default="", description="Identifier of the current task", ) available_actions: str = Field( default="", description="Comma-separated list of action IDs available in this task", ) class PermanenceState(State): """ Episode-level metadata returned by the ``state`` property. Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``. """ task_id: str = Field(default="", description="Current task identifier") task_difficulty: int = Field(default=0, description="Task difficulty level 1-5") locked_actions: List[str] = Field( default_factory=list, description="Action IDs locked by prior irreversible choices this episode", ) critical_options: Dict[str, Any] = Field( default_factory=dict, description=( "Tracked high-value future action paths and their availability. " "Most entries are booleans (option is/isn't available), but tech " "tasks store additional scenario metadata here (primary_table " "name, row counts, commit counts, etc.) so evaluators can " "reproduce the exact scenario." ), ) terminated: bool = Field(default=False) truncated: bool = Field(default=False) termination_reason: Optional[str] = Field(default=None) # --------------------------------------------------------------------------- # Server request models (used by the FastAPI layer only) # --------------------------------------------------------------------------- class ResetRequest(BaseModel): """Request body for ``POST /reset``.""" task_id: str = Field( default="task_correction", description=( "Task to initialise. One of: task_correction, task_conflict, " "task_launch, task_crisis, task_cascade" ), ) seed: Optional[int] = Field( default=None, description="Random seed for reproducible scenario generation. None = random.", ) class StepRequest(BaseModel): """Request body for ``POST /step``.""" action: PermanenceAction