File size: 3,845 Bytes
21c24ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
PERMANENCE — OpenEnv-compliant action, observation, and state models.

These models inherit from openenv.core base classes so the environment
integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces.
"""
from __future__ import annotations

from typing import Any, Dict, List, Optional

from openenv.core import Action, Observation, State
from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# OpenEnv-native types (used by the core Environment subclass)
# ---------------------------------------------------------------------------

class PermanenceAction(Action):
    """
    Agent action for the PERMANENCE environment.

    The agent produces free-form text containing:
    - A <thinking>...</thinking> reasoning block
    - An <action id="..." param1="..." .../> tag
    - A <reversibility level="R1-R5" confidence="0.0-1.0"/> tag

    The environment parses these tags internally.
    """

    text: str = Field(
        ...,
        description=(
            "Agent's complete free-form response including thinking, "
            "action, and reversibility tags"
        ),
        min_length=1,
        max_length=8192,
    )


class PermanenceObservation(Observation):
    """
    Environment observation returned after reset() and step().

    Inherits ``done``, ``reward``, and ``metadata`` from
    ``openenv.core.Observation``.
    """

    text: str = Field(
        ...,
        description="Formatted world-state observation text presented to the agent",
    )
    step: int = Field(
        default=0,
        description="Current step number within the episode (0-indexed)",
        ge=0,
    )
    task_id: str = Field(
        default="",
        description="Identifier of the current task",
    )
    available_actions: str = Field(
        default="",
        description="Comma-separated list of action IDs available in this task",
    )


class PermanenceState(State):
    """
    Episode-level metadata returned by the ``state`` property.

    Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``.
    """

    task_id: str = Field(default="", description="Current task identifier")
    task_difficulty: int = Field(default=0, description="Task difficulty level 1-5")
    locked_actions: List[str] = Field(
        default_factory=list,
        description="Action IDs locked by prior irreversible choices this episode",
    )
    critical_options: Dict[str, Any] = Field(
        default_factory=dict,
        description=(
            "Tracked high-value future action paths and their availability. "
            "Most entries are booleans (option is/isn't available), but tech "
            "tasks store additional scenario metadata here (primary_table "
            "name, row counts, commit counts, etc.) so evaluators can "
            "reproduce the exact scenario."
        ),
    )
    terminated: bool = Field(default=False)
    truncated: bool = Field(default=False)
    termination_reason: Optional[str] = Field(default=None)


# ---------------------------------------------------------------------------
# Server request models (used by the FastAPI layer only)
# ---------------------------------------------------------------------------

class ResetRequest(BaseModel):
    """Request body for ``POST /reset``."""

    task_id: str = Field(
        default="task_correction",
        description=(
            "Task to initialise. One of: task_correction, task_conflict, "
            "task_launch, task_crisis, task_cascade"
        ),
    )
    seed: Optional[int] = Field(
        default=None,
        description="Random seed for reproducible scenario generation. None = random.",
    )


class StepRequest(BaseModel):
    """Request body for ``POST /step``."""

    action: PermanenceAction