agentops-gym / models.py
Revanth-ml's picture
Upload folder using huggingface_hub
e2eb9d7 verified
"""
AgentOps Gym — Pydantic models for Action, Observation, and State.
The agent operates on a simulated Python codebase by calling tools.
The environment is partially observable, stateful, and efficiency-aware.
Rewards shrink with wasteful or redundant tool calls.
"""
from typing import Optional, List, Dict, Any
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State
class ToolCall(Action):
"""Agent submits a tool call with a name and parameters.
Open action space: any valid tool name from AVAILABLE_TOOLS with
any parameters. This mirrors how real agents interact with tool-use
environments — no artificial discretization.
"""
tool: str = Field(
...,
description="Tool name (FileRead, FileWrite, Grep, Bash, WebSearch, TodoWrite)"
)
parameters: Dict[str, Any] = Field(
default_factory=dict,
description="Tool parameters, e.g. {'filename': 'main.py'} or {'pattern': 'def fetch'}"
)
reasoning: Optional[str] = Field(
default=None,
description="Optional: why the agent is calling this tool (for interpretability)"
)
class AgentObservation(Observation):
"""What the agent sees after each action.
Inherits from Observation which provides:
- done: bool
- reward: Optional[float]
- metadata: Dict[str, Any]
"""
# Files the agent has discovered so far (partial observability)
visible_files: List[str] = Field(
default_factory=list,
description="Files the agent currently knows exist in the project"
)
# Output of the most recent tool call
last_tool_result: Optional[str] = Field(
default=None,
description="Output string from the last tool call"
)
# Sequential history of tool calls made this episode
action_history: List[str] = Field(
default_factory=list,
description="e.g. ['Grep(pattern=timeout)', 'FileRead(config.json)']"
)
step_count: int = Field(default=0, description="How many steps taken so far")
task_description: str = Field(default="", description="The task the agent must solve")
# Feedback from the environment on quality of last action
message: Optional[str] = Field(
default=None,
description="Environment feedback e.g. 'redundant call detected'"
)
class AgentState(State):
"""Episode metadata for training harnesses and curriculum schedulers.
Inherits from State which provides:
- episode_id: Optional[str]
- step_count: int
"""
task_id: str = Field(default="", description="Current task identifier")
task_description: str = Field(default="", description="Human-readable task description")
difficulty: str = Field(default="", description="easy / medium / hard")
max_steps: int = Field(default=10, description="Max steps allowed this episode")
visible_files: List[str] = Field(default_factory=list)
discovered_files: List[str] = Field(default_factory=list)
action_history: List[str] = Field(default_factory=list)
current_reward: float = Field(default=0.0, description="Cumulative reward so far")
completed: bool = Field(default=False)
grader_score: Optional[float] = Field(
default=None,
description="Final grader score (0.0-1.0), set at end of episode"
)