Spaces:
Sleeping
Sleeping
File size: 3,363 Bytes
e2eb9d7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 | """
AgentOps Gym — Pydantic models for Action, Observation, and State.
The agent operates on a simulated Python codebase by calling tools.
The environment is partially observable, stateful, and efficiency-aware.
Rewards shrink with wasteful or redundant tool calls.
"""
from typing import Optional, List, Dict, Any
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State
class ToolCall(Action):
"""Agent submits a tool call with a name and parameters.
Open action space: any valid tool name from AVAILABLE_TOOLS with
any parameters. This mirrors how real agents interact with tool-use
environments — no artificial discretization.
"""
tool: str = Field(
...,
description="Tool name (FileRead, FileWrite, Grep, Bash, WebSearch, TodoWrite)"
)
parameters: Dict[str, Any] = Field(
default_factory=dict,
description="Tool parameters, e.g. {'filename': 'main.py'} or {'pattern': 'def fetch'}"
)
reasoning: Optional[str] = Field(
default=None,
description="Optional: why the agent is calling this tool (for interpretability)"
)
class AgentObservation(Observation):
"""What the agent sees after each action.
Inherits from Observation which provides:
- done: bool
- reward: Optional[float]
- metadata: Dict[str, Any]
"""
# Files the agent has discovered so far (partial observability)
visible_files: List[str] = Field(
default_factory=list,
description="Files the agent currently knows exist in the project"
)
# Output of the most recent tool call
last_tool_result: Optional[str] = Field(
default=None,
description="Output string from the last tool call"
)
# Sequential history of tool calls made this episode
action_history: List[str] = Field(
default_factory=list,
description="e.g. ['Grep(pattern=timeout)', 'FileRead(config.json)']"
)
step_count: int = Field(default=0, description="How many steps taken so far")
task_description: str = Field(default="", description="The task the agent must solve")
# Feedback from the environment on quality of last action
message: Optional[str] = Field(
default=None,
description="Environment feedback e.g. 'redundant call detected'"
)
class AgentState(State):
"""Episode metadata for training harnesses and curriculum schedulers.
Inherits from State which provides:
- episode_id: Optional[str]
- step_count: int
"""
task_id: str = Field(default="", description="Current task identifier")
task_description: str = Field(default="", description="Human-readable task description")
difficulty: str = Field(default="", description="easy / medium / hard")
max_steps: int = Field(default=10, description="Max steps allowed this episode")
visible_files: List[str] = Field(default_factory=list)
discovered_files: List[str] = Field(default_factory=list)
action_history: List[str] = Field(default_factory=list)
current_reward: float = Field(default=0.0, description="Cumulative reward so far")
completed: bool = Field(default=False)
grader_score: Optional[float] = Field(
default=None,
description="Final grader score (0.0-1.0), set at end of episode"
) |