| """ |
| PERMANENCE — OpenEnv-compliant action, observation, and state models. |
| |
| These models inherit from openenv.core base classes so the environment |
| integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces. |
| """ |
| from __future__ import annotations |
|
|
| from typing import Any, Dict, List, Optional |
|
|
| from openenv.core import Action, Observation, State |
| from pydantic import BaseModel, Field |
|
|
|
|
| |
| |
| |
|
|
| class PermanenceAction(Action): |
| """ |
| Agent action for the PERMANENCE environment. |
| |
| The agent produces free-form text containing: |
| - A <thinking>...</thinking> reasoning block |
| - An <action id="..." param1="..." .../> tag |
| - A <reversibility level="R1-R5" confidence="0.0-1.0"/> tag |
| |
| The environment parses these tags internally. |
| """ |
|
|
| text: str = Field( |
| ..., |
| description=( |
| "Agent's complete free-form response including thinking, " |
| "action, and reversibility tags" |
| ), |
| min_length=1, |
| max_length=8192, |
| ) |
|
|
|
|
| class PermanenceObservation(Observation): |
| """ |
| Environment observation returned after reset() and step(). |
| |
| Inherits ``done``, ``reward``, and ``metadata`` from |
| ``openenv.core.Observation``. |
| """ |
|
|
| text: str = Field( |
| ..., |
| description="Formatted world-state observation text presented to the agent", |
| ) |
| step: int = Field( |
| default=0, |
| description="Current step number within the episode (0-indexed)", |
| ge=0, |
| ) |
| task_id: str = Field( |
| default="", |
| description="Identifier of the current task", |
| ) |
| available_actions: str = Field( |
| default="", |
| description="Comma-separated list of action IDs available in this task", |
| ) |
|
|
|
|
| class PermanenceState(State): |
| """ |
| Episode-level metadata returned by the ``state`` property. |
| |
| Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``. |
| """ |
|
|
| task_id: str = Field(default="", description="Current task identifier") |
| task_difficulty: int = Field(default=0, description="Task difficulty level 1-5") |
| locked_actions: List[str] = Field( |
| default_factory=list, |
| description="Action IDs locked by prior irreversible choices this episode", |
| ) |
| critical_options: Dict[str, Any] = Field( |
| default_factory=dict, |
| description=( |
| "Tracked high-value future action paths and their availability. " |
| "Most entries are booleans (option is/isn't available), but tech " |
| "tasks store additional scenario metadata here (primary_table " |
| "name, row counts, commit counts, etc.) so evaluators can " |
| "reproduce the exact scenario." |
| ), |
| ) |
| terminated: bool = Field(default=False) |
| truncated: bool = Field(default=False) |
| termination_reason: Optional[str] = Field(default=None) |
|
|
|
|
| |
| |
| |
|
|
| class ResetRequest(BaseModel): |
| """Request body for ``POST /reset``.""" |
|
|
| task_id: str = Field( |
| default="task_correction", |
| description=( |
| "Task to initialise. One of: task_correction, task_conflict, " |
| "task_launch, task_crisis, task_cascade" |
| ), |
| ) |
| seed: Optional[int] = Field( |
| default=None, |
| description="Random seed for reproducible scenario generation. None = random.", |
| ) |
|
|
|
|
| class StepRequest(BaseModel): |
| """Request body for ``POST /step``.""" |
|
|
| action: PermanenceAction |
|
|