File size: 3,363 Bytes
e2eb9d7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
"""
AgentOps Gym — Pydantic models for Action, Observation, and State.

The agent operates on a simulated Python codebase by calling tools.
The environment is partially observable, stateful, and efficiency-aware.
Rewards shrink with wasteful or redundant tool calls.
"""

from typing import Optional, List, Dict, Any
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State


class ToolCall(Action):
    """Agent submits a tool call with a name and parameters.

    Open action space: any valid tool name from AVAILABLE_TOOLS with
    any parameters. This mirrors how real agents interact with tool-use
    environments — no artificial discretization.
    """
    tool: str = Field(
        ...,
        description="Tool name (FileRead, FileWrite, Grep, Bash, WebSearch, TodoWrite)"
    )
    parameters: Dict[str, Any] = Field(
        default_factory=dict,
        description="Tool parameters, e.g. {'filename': 'main.py'} or {'pattern': 'def fetch'}"
    )
    reasoning: Optional[str] = Field(
        default=None,
        description="Optional: why the agent is calling this tool (for interpretability)"
    )


class AgentObservation(Observation):
    """What the agent sees after each action.

    Inherits from Observation which provides:
        - done: bool
        - reward: Optional[float]
        - metadata: Dict[str, Any]
    """
    # Files the agent has discovered so far (partial observability)
    visible_files: List[str] = Field(
        default_factory=list,
        description="Files the agent currently knows exist in the project"
    )
    # Output of the most recent tool call
    last_tool_result: Optional[str] = Field(
        default=None,
        description="Output string from the last tool call"
    )
    # Sequential history of tool calls made this episode
    action_history: List[str] = Field(
        default_factory=list,
        description="e.g. ['Grep(pattern=timeout)', 'FileRead(config.json)']"
    )
    step_count: int = Field(default=0, description="How many steps taken so far")
    task_description: str = Field(default="", description="The task the agent must solve")
    # Feedback from the environment on quality of last action
    message: Optional[str] = Field(
        default=None,
        description="Environment feedback e.g. 'redundant call detected'"
    )


class AgentState(State):
    """Episode metadata for training harnesses and curriculum schedulers.

    Inherits from State which provides:
        - episode_id: Optional[str]
        - step_count: int
    """
    task_id: str = Field(default="", description="Current task identifier")
    task_description: str = Field(default="", description="Human-readable task description")
    difficulty: str = Field(default="", description="easy / medium / hard")
    max_steps: int = Field(default=10, description="Max steps allowed this episode")
    visible_files: List[str] = Field(default_factory=list)
    discovered_files: List[str] = Field(default_factory=list)
    action_history: List[str] = Field(default_factory=list)
    current_reward: float = Field(default=0.0, description="Cumulative reward so far")
    completed: bool = Field(default=False)
    grader_score: Optional[float] = Field(
        default=None,
        description="Final grader score (0.0-1.0), set at end of episode"
    )