File size: 4,215 Bytes
2930dae
5b64237
2930dae
5b64237
2930dae
 
 
 
 
 
 
 
5b64237
2930dae
 
5b64237
 
 
 
 
 
 
 
 
 
 
 
2930dae
 
 
 
 
 
 
 
5b64237
2930dae
 
5b64237
2930dae
 
 
 
5b64237
2930dae
 
5b64237
2930dae
 
 
 
 
 
 
5b64237
 
 
 
 
 
2930dae
 
 
 
 
 
 
 
5b64237
 
 
2930dae
 
 
5b64237
2930dae
 
 
 
 
 
 
 
 
 
5b64237
2930dae
 
 
 
 
 
 
 
 
5b64237
 
2930dae
 
 
5b64237
2930dae
 
 
 
 
 
5b64237
2930dae
 
 
 
5b64237
2930dae
 
 
 
 
 
5b64237
2930dae
 
 
 
5b64237
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Pydantic models for DevOpsEnv OpenEnv environment.

Domain: Linux DevOps & SRE Troubleshooting
"""
from __future__ import annotations

from typing import Any, Dict, List, Optional
from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# System State Models
# ---------------------------------------------------------------------------

class SystemState(BaseModel):
    """Current state of the mock Linux server."""
    task_id: str
    available_commands: List[str]
    filesystem_snapshot: str
    running_processes: List[Dict[str, Any]]
    service_status: Dict[str, str]
    logs: str
    http_ports_open: List[int]
    docker_containers: List[Dict[str, str]]
    cpu_usage: float
    memory_usage_mb: int


# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------

class Observation(BaseModel):
    """Everything the agent sees at each step."""
    task_id: str = Field(description="task1 | task2 | task3")
    task_description: str = Field(description="Human-readable task description")
    episode_id: str = Field(description="Unique episode UUID")
    system_state: SystemState
    thread_history: List[Dict[str, str]] = Field(
        default_factory=list,
        description="Ordered list of {'role': 'agent'|'system', 'content': str}"
    )
    available_actions: List[str]
    step_number: int
    max_steps: int
    hint: Optional[str] = Field(default=None)


# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------

class Action(BaseModel):
    """Agent action: run a bash command, edit a file, or submit."""
    action_type: str = Field(description="bash_cmd | file_edit | submit")
    command: Optional[str] = Field(default=None, description="Bash command to execute")
    file_path: Optional[str] = Field(default=None, description="Absolute path to file to edit")
    file_content: Optional[str] = Field(default=None, description="New full content for the file")
    summary: Optional[str] = Field(default=None, description="Final summary of actions taken")


# ---------------------------------------------------------------------------
# Reward
# ---------------------------------------------------------------------------

class Reward(BaseModel):
    """Per-step reward signal."""
    step_reward: float
    total_reward: float
    explanation: str


# ---------------------------------------------------------------------------
# Step Result
# ---------------------------------------------------------------------------

class StepResult(BaseModel):
    observation: Observation
    reward: Reward
    done: bool
    info: Dict[str, Any] = Field(default_factory=dict)


# ---------------------------------------------------------------------------
# State
# ---------------------------------------------------------------------------

class State(BaseModel):
    task_id: str
    episode_id: str
    step_number: int
    max_steps: int
    done: bool
    total_reward: float
    history: List[Dict[str, Any]] = Field(default_factory=list)
    final_score: Optional[float] = Field(default=None)


# ---------------------------------------------------------------------------
# Task Metadata
# ---------------------------------------------------------------------------

class TaskInfo(BaseModel):
    task_id: str
    name: str
    description: str
    difficulty: str
    max_steps: int


# ---------------------------------------------------------------------------
# Grader Response
# ---------------------------------------------------------------------------

class GraderResponse(BaseModel):
    episode_id: str
    task_id: str
    score: float = Field(description="Final grader score 0.0–1.0")
    breakdown: Dict[str, float] = Field(default_factory=dict)
    feedback: str


class BaselineResult(BaseModel):
    """Result of running the baseline agent."""
    task_id: str
    episode_id: str
    final_score: float
    step_count: int
    total_reward: float
    actions: List[Dict[str, Any]]