File size: 6,484 Bytes
637f42c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efa2d2a
637f42c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efa2d2a
 
637f42c
 
 
 
 
 
 
 
 
 
 
efa2d2a
 
 
 
 
 
637f42c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
"""
TeamForge Models
Strictly-typed Pydantic models for all observations, actions, and state.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, Field


# ─────────────────────────────────────────────
# ENUMS
# ─────────────────────────────────────────────

class TaskDifficulty(str, Enum):
    EASY = "easy"
    MEDIUM = "medium"
    HARD = "hard"
    BONUS = "bonus"


class PhaseState(str, Enum):
    PLANNING = "planning"
    CODING = "coding"
    TESTING = "testing"
    REVIEWING = "reviewing"
    REFLECTING = "reflecting"
    DONE = "done"


class ActionStatus(str, Enum):
    SUCCESS = "success"
    FAILURE = "failure"
    PARTIAL = "partial"


# ─────────────────────────────────────────────
# ACTIONS  (Union-typed, discriminated by `type`)
# ─────────────────────────────────────────────

class PlanStep(BaseModel):
    type: Literal["plan_step"] = "plan_step"
    step_number: int = Field(..., ge=1, description="Sequential step number in the plan")
    description: str = Field(..., min_length=5, description="What this step accomplishes")
    estimated_effort: Literal["low", "medium", "high"] = "medium"
    depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on")


class EditFile(BaseModel):
    type: Literal["edit_file"] = "edit_file"
    file_path: str = Field(..., description="Relative path inside the repo")
    content: str = Field(..., description="Full new content of the file")
    reason: str = Field(..., min_length=5, description="Why this edit is being made")


class RunTests(BaseModel):
    type: Literal["run_tests"] = "run_tests"
    test_path: Optional[str] = Field(None, description="Specific test file or None for all tests")
    timeout_seconds: int = Field(default=30, ge=5, le=120)


class RunLint(BaseModel):
    type: Literal["run_lint"] = "run_lint"
    fix: bool = Field(default=False, description="Whether to auto-fix lint issues")
    file_path: Optional[str] = Field(None, description="Specific file or None for entire repo")


class GenerateReview(BaseModel):
    type: Literal["generate_review"] = "generate_review"
    focus_areas: List[str] = Field(
        default_factory=lambda: ["correctness", "style", "performance"],
        description="Areas to focus the review on"
    )
    review_text: str = Field(..., min_length=20, description="Detailed review of the current state")


class Commit(BaseModel):
    type: Literal["commit"] = "commit"
    message: str = Field(..., min_length=10, description="Conventional-commit style message")
    files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed")


class SelfReflect(BaseModel):
    type: Literal["self_reflect"] = "self_reflect"
    what_went_well: str = Field(..., min_length=10)
    what_to_improve: str = Field(..., min_length=10)
    adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task")


class RequestIteration(BaseModel):
    type: Literal["request_iteration"] = "request_iteration"
    reason: str = Field(..., min_length=10, description="Why another iteration is needed")
    target_issues: List[str] = Field(default_factory=list, description="Specific issues to address")


# Discriminated union of all actions
Action = Union[
    PlanStep,
    EditFile,
    RunTests,
    RunLint,
    GenerateReview,
    Commit,
    SelfReflect,
    RequestIteration,
]


# ─────────────────────────────────────────────
# OBSERVATION
# ─────────────────────────────────────────────

class FileSnapshot(BaseModel):
    path: str
    content: str
    size_bytes: int = 0


class TestResult(BaseModel):
    passed: int = 0
    failed: int = 0
    errors: int = 0
    output: str = ""
    duration_seconds: float = 0.0


class LintResult(BaseModel):
    violations: int = 0
    output: str = ""
    score: float = 0.9  # 0.9 = clean


class ReviewArtifact(BaseModel):
    reviewer: str = "agent"
    focus_areas: List[str] = Field(default_factory=list)
    text: str = ""
    timestamp_step: int = 0


class ReflectionArtifact(BaseModel):
    step: int
    what_went_well: str
    what_to_improve: str
    adjusted_plan: Optional[str] = None


class Observation(BaseModel):
    """Full environment observation returned after every step."""
    # Task context
    task_id: str
    task_description: str
    difficulty: TaskDifficulty
    step_number: int
    max_steps: int
    phase: PhaseState

    # Repository state
    repo_files: List[FileSnapshot] = Field(default_factory=list)
    git_log: List[str] = Field(default_factory=list)

    # Execution results
    last_action_type: Optional[str] = None
    last_action_status: ActionStatus = ActionStatus.SUCCESS
    last_action_output: str = ""

    # Test & lint
    test_results: Optional[TestResult] = None
    lint_results: Optional[LintResult] = None

    # Team artifacts
    plan: List[PlanStep] = Field(default_factory=list)
    reviews: List[ReviewArtifact] = Field(default_factory=list)
    reflections: List[ReflectionArtifact] = Field(default_factory=list)

    # Signals
    reward: float = 0.1
    cumulative_reward: float = 0.1
    done: bool = False
    info: Dict[str, Any] = Field(default_factory=dict)


# ─────────────────────────────────────────────
# EPISODE RESULT (for grader)
# ─────────────────────────────────────────────

class EpisodeResult(BaseModel):
    task_id: str
    total_steps: int
    test_pass_rate: float = 0.1
    lint_score: float = 0.1
    efficiency_score: float = 0.1
    review_quality: float = 0.1
    reflection_quality: float = 0.1
    final_score: float = 0.1
    passed: bool = False
    log: List[str] = Field(default_factory=list)