Spaces:
Paused
Paused
| """ | |
| PERMANENCE — OpenEnv-compliant action, observation, and state models. | |
| These models inherit from openenv.core base classes so the environment | |
| integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces. | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Dict, List, Optional | |
| from openenv.core import Action, Observation, State | |
| from pydantic import BaseModel, Field | |
| # --------------------------------------------------------------------------- | |
| # OpenEnv-native types (used by the core Environment subclass) | |
| # --------------------------------------------------------------------------- | |
| class PermanenceAction(Action): | |
| """ | |
| Agent action for the PERMANENCE environment. | |
| The agent produces free-form text containing: | |
| - A <thinking>...</thinking> reasoning block | |
| - An <action id="..." param1="..." .../> tag | |
| - A <reversibility level="R1-R5" confidence="0.0-1.0"/> tag | |
| The environment parses these tags internally. | |
| """ | |
| text: str = Field( | |
| ..., | |
| description=( | |
| "Agent's complete free-form response including thinking, " | |
| "action, and reversibility tags" | |
| ), | |
| min_length=1, | |
| max_length=8192, | |
| ) | |
| class PermanenceObservation(Observation): | |
| """ | |
| Environment observation returned after reset() and step(). | |
| Inherits ``done``, ``reward``, and ``metadata`` from | |
| ``openenv.core.Observation``. | |
| """ | |
| text: str = Field( | |
| ..., | |
| description="Formatted world-state observation text presented to the agent", | |
| ) | |
| step: int = Field( | |
| default=0, | |
| description="Current step number within the episode (0-indexed)", | |
| ge=0, | |
| ) | |
| task_id: str = Field( | |
| default="", | |
| description="Identifier of the current task", | |
| ) | |
| available_actions: str = Field( | |
| default="", | |
| description="Comma-separated list of action IDs available in this task", | |
| ) | |
| class PermanenceState(State): | |
| """ | |
| Episode-level metadata returned by the ``state`` property. | |
| Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``. | |
| """ | |
| task_id: str = Field(default="", description="Current task identifier") | |
| task_difficulty: int = Field(default=0, description="Task difficulty level 1-5") | |
| locked_actions: List[str] = Field( | |
| default_factory=list, | |
| description="Action IDs locked by prior irreversible choices this episode", | |
| ) | |
| critical_options: Dict[str, Any] = Field( | |
| default_factory=dict, | |
| description=( | |
| "Tracked high-value future action paths and their availability. " | |
| "Most entries are booleans (option is/isn't available), but tech " | |
| "tasks store additional scenario metadata here (primary_table " | |
| "name, row counts, commit counts, etc.) so evaluators can " | |
| "reproduce the exact scenario." | |
| ), | |
| ) | |
| terminated: bool = Field(default=False) | |
| truncated: bool = Field(default=False) | |
| termination_reason: Optional[str] = Field(default=None) | |
| # --------------------------------------------------------------------------- | |
| # Server request models (used by the FastAPI layer only) | |
| # --------------------------------------------------------------------------- | |
| class ResetRequest(BaseModel): | |
| """Request body for ``POST /reset``.""" | |
| task_id: str = Field( | |
| default="task_correction", | |
| description=( | |
| "Task to initialise. One of: task_correction, task_conflict, " | |
| "task_launch, task_crisis, task_cascade" | |
| ), | |
| ) | |
| seed: Optional[int] = Field( | |
| default=None, | |
| description="Random seed for reproducible scenario generation. None = random.", | |
| ) | |
| class StepRequest(BaseModel): | |
| """Request body for ``POST /step``.""" | |
| action: PermanenceAction | |