""" AgentOps Gym — Pydantic models for Action, Observation, and State. The agent operates on a simulated Python codebase by calling tools. The environment is partially observable, stateful, and efficiency-aware. Rewards shrink with wasteful or redundant tool calls. """ from typing import Optional, List, Dict, Any from pydantic import Field from openenv.core.env_server.types import Action, Observation, State class ToolCall(Action): """Agent submits a tool call with a name and parameters. Open action space: any valid tool name from AVAILABLE_TOOLS with any parameters. This mirrors how real agents interact with tool-use environments — no artificial discretization. """ tool: str = Field( ..., description="Tool name (FileRead, FileWrite, Grep, Bash, WebSearch, TodoWrite)" ) parameters: Dict[str, Any] = Field( default_factory=dict, description="Tool parameters, e.g. {'filename': 'main.py'} or {'pattern': 'def fetch'}" ) reasoning: Optional[str] = Field( default=None, description="Optional: why the agent is calling this tool (for interpretability)" ) class AgentObservation(Observation): """What the agent sees after each action. Inherits from Observation which provides: - done: bool - reward: Optional[float] - metadata: Dict[str, Any] """ # Files the agent has discovered so far (partial observability) visible_files: List[str] = Field( default_factory=list, description="Files the agent currently knows exist in the project" ) # Output of the most recent tool call last_tool_result: Optional[str] = Field( default=None, description="Output string from the last tool call" ) # Sequential history of tool calls made this episode action_history: List[str] = Field( default_factory=list, description="e.g. ['Grep(pattern=timeout)', 'FileRead(config.json)']" ) step_count: int = Field(default=0, description="How many steps taken so far") task_description: str = Field(default="", description="The task the agent must solve") # Feedback from the environment on quality of last action message: Optional[str] = Field( default=None, description="Environment feedback e.g. 'redundant call detected'" ) class AgentState(State): """Episode metadata for training harnesses and curriculum schedulers. Inherits from State which provides: - episode_id: Optional[str] - step_count: int """ task_id: str = Field(default="", description="Current task identifier") task_description: str = Field(default="", description="Human-readable task description") difficulty: str = Field(default="", description="easy / medium / hard") max_steps: int = Field(default=10, description="Max steps allowed this episode") visible_files: List[str] = Field(default_factory=list) discovered_files: List[str] = Field(default_factory=list) action_history: List[str] = Field(default_factory=list) current_reward: float = Field(default=0.0, description="Cumulative reward so far") completed: bool = Field(default=False) grader_score: Optional[float] = Field( default=None, description="Final grader score (0.0-1.0), set at end of episode" )