Spaces:

chane335
/

permanence-training

Paused

App Files Files Community

permanence-training / models.py

chane335

Run 4: tech-only curriculum, 3B model, integrated deploy task

94bea2c verified about 1 month ago

raw

history blame contribute delete

3.85 kB

	"""
	PERMANENCE — OpenEnv-compliant action, observation, and state models.

	These models inherit from openenv.core base classes so the environment
	integrates natively with the OpenEnv framework, TRL, and HuggingFace Spaces.
	"""
	from __future__ import annotations

	from typing import Any, Dict, List, Optional

	from openenv.core import Action, Observation, State
	from pydantic import BaseModel, Field


	# ---------------------------------------------------------------------------
	# OpenEnv-native types (used by the core Environment subclass)
	# ---------------------------------------------------------------------------

	class PermanenceAction(Action):
	"""
	Agent action for the PERMANENCE environment.

	The agent produces free-form text containing:
	- A <thinking>...</thinking> reasoning block
	- An <action id="..." param1="..." .../> tag
	- A <reversibility level="R1-R5" confidence="0.0-1.0"/> tag

	The environment parses these tags internally.
	"""

	text: str = Field(
	...,
	description=(
	"Agent's complete free-form response including thinking, "
	"action, and reversibility tags"
	),
	min_length=1,
	max_length=8192,
	)


	class PermanenceObservation(Observation):
	"""
	Environment observation returned after reset() and step().

	Inherits ``done``, ``reward``, and ``metadata`` from
	``openenv.core.Observation``.
	"""

	text: str = Field(
	...,
	description="Formatted world-state observation text presented to the agent",
	)
	step: int = Field(
	default=0,
	description="Current step number within the episode (0-indexed)",
	ge=0,
	)
	task_id: str = Field(
	default="",
	description="Identifier of the current task",
	)
	available_actions: str = Field(
	default="",
	description="Comma-separated list of action IDs available in this task",
	)


	class PermanenceState(State):
	"""
	Episode-level metadata returned by the ``state`` property.

	Inherits ``episode_id`` and ``step_count`` from ``openenv.core.State``.
	"""

	task_id: str = Field(default="", description="Current task identifier")
	task_difficulty: int = Field(default=0, description="Task difficulty level 1-5")
	locked_actions: List[str] = Field(
	default_factory=list,
	description="Action IDs locked by prior irreversible choices this episode",
	)
	critical_options: Dict[str, Any] = Field(
	default_factory=dict,
	description=(
	"Tracked high-value future action paths and their availability. "
	"Most entries are booleans (option is/isn't available), but tech "
	"tasks store additional scenario metadata here (primary_table "
	"name, row counts, commit counts, etc.) so evaluators can "
	"reproduce the exact scenario."
	),
	)
	terminated: bool = Field(default=False)
	truncated: bool = Field(default=False)
	termination_reason: Optional[str] = Field(default=None)


	# ---------------------------------------------------------------------------
	# Server request models (used by the FastAPI layer only)
	# ---------------------------------------------------------------------------

	class ResetRequest(BaseModel):
	"""Request body for ``POST /reset``."""

	task_id: str = Field(
	default="task_correction",
	description=(
	"Task to initialise. One of: task_correction, task_conflict, "
	"task_launch, task_crisis, task_cascade"
	),
	)
	seed: Optional[int] = Field(
	default=None,
	description="Random seed for reproducible scenario generation. None = random.",
	)


	class StepRequest(BaseModel):
	"""Request body for ``POST /step``."""

	action: PermanenceAction