Spaces:
Sleeping
Sleeping
File size: 6,484 Bytes
637f42c efa2d2a 637f42c efa2d2a 637f42c efa2d2a 637f42c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 | """
TeamForge Models
Strictly-typed Pydantic models for all observations, actions, and state.
"""
from __future__ import annotations
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, Field
# βββββββββββββββββββββββββββββββββββββββββββββ
# ENUMS
# βββββββββββββββββββββββββββββββββββββββββββββ
class TaskDifficulty(str, Enum):
EASY = "easy"
MEDIUM = "medium"
HARD = "hard"
BONUS = "bonus"
class PhaseState(str, Enum):
PLANNING = "planning"
CODING = "coding"
TESTING = "testing"
REVIEWING = "reviewing"
REFLECTING = "reflecting"
DONE = "done"
class ActionStatus(str, Enum):
SUCCESS = "success"
FAILURE = "failure"
PARTIAL = "partial"
# βββββββββββββββββββββββββββββββββββββββββββββ
# ACTIONS (Union-typed, discriminated by `type`)
# βββββββββββββββββββββββββββββββββββββββββββββ
class PlanStep(BaseModel):
type: Literal["plan_step"] = "plan_step"
step_number: int = Field(..., ge=1, description="Sequential step number in the plan")
description: str = Field(..., min_length=5, description="What this step accomplishes")
estimated_effort: Literal["low", "medium", "high"] = "medium"
depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on")
class EditFile(BaseModel):
type: Literal["edit_file"] = "edit_file"
file_path: str = Field(..., description="Relative path inside the repo")
content: str = Field(..., description="Full new content of the file")
reason: str = Field(..., min_length=5, description="Why this edit is being made")
class RunTests(BaseModel):
type: Literal["run_tests"] = "run_tests"
test_path: Optional[str] = Field(None, description="Specific test file or None for all tests")
timeout_seconds: int = Field(default=30, ge=5, le=120)
class RunLint(BaseModel):
type: Literal["run_lint"] = "run_lint"
fix: bool = Field(default=False, description="Whether to auto-fix lint issues")
file_path: Optional[str] = Field(None, description="Specific file or None for entire repo")
class GenerateReview(BaseModel):
type: Literal["generate_review"] = "generate_review"
focus_areas: List[str] = Field(
default_factory=lambda: ["correctness", "style", "performance"],
description="Areas to focus the review on"
)
review_text: str = Field(..., min_length=20, description="Detailed review of the current state")
class Commit(BaseModel):
type: Literal["commit"] = "commit"
message: str = Field(..., min_length=10, description="Conventional-commit style message")
files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed")
class SelfReflect(BaseModel):
type: Literal["self_reflect"] = "self_reflect"
what_went_well: str = Field(..., min_length=10)
what_to_improve: str = Field(..., min_length=10)
adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task")
class RequestIteration(BaseModel):
type: Literal["request_iteration"] = "request_iteration"
reason: str = Field(..., min_length=10, description="Why another iteration is needed")
target_issues: List[str] = Field(default_factory=list, description="Specific issues to address")
# Discriminated union of all actions
Action = Union[
PlanStep,
EditFile,
RunTests,
RunLint,
GenerateReview,
Commit,
SelfReflect,
RequestIteration,
]
# βββββββββββββββββββββββββββββββββββββββββββββ
# OBSERVATION
# βββββββββββββββββββββββββββββββββββββββββββββ
class FileSnapshot(BaseModel):
path: str
content: str
size_bytes: int = 0
class TestResult(BaseModel):
passed: int = 0
failed: int = 0
errors: int = 0
output: str = ""
duration_seconds: float = 0.0
class LintResult(BaseModel):
violations: int = 0
output: str = ""
score: float = 0.9 # 0.9 = clean
class ReviewArtifact(BaseModel):
reviewer: str = "agent"
focus_areas: List[str] = Field(default_factory=list)
text: str = ""
timestamp_step: int = 0
class ReflectionArtifact(BaseModel):
step: int
what_went_well: str
what_to_improve: str
adjusted_plan: Optional[str] = None
class Observation(BaseModel):
"""Full environment observation returned after every step."""
# Task context
task_id: str
task_description: str
difficulty: TaskDifficulty
step_number: int
max_steps: int
phase: PhaseState
# Repository state
repo_files: List[FileSnapshot] = Field(default_factory=list)
git_log: List[str] = Field(default_factory=list)
# Execution results
last_action_type: Optional[str] = None
last_action_status: ActionStatus = ActionStatus.SUCCESS
last_action_output: str = ""
# Test & lint
test_results: Optional[TestResult] = None
lint_results: Optional[LintResult] = None
# Team artifacts
plan: List[PlanStep] = Field(default_factory=list)
reviews: List[ReviewArtifact] = Field(default_factory=list)
reflections: List[ReflectionArtifact] = Field(default_factory=list)
# Signals
reward: float = 0.1
cumulative_reward: float = 0.1
done: bool = False
info: Dict[str, Any] = Field(default_factory=dict)
# βββββββββββββββββββββββββββββββββββββββββββββ
# EPISODE RESULT (for grader)
# βββββββββββββββββββββββββββββββββββββββββββββ
class EpisodeResult(BaseModel):
task_id: str
total_steps: int
test_pass_rate: float = 0.1
lint_score: float = 0.1
efficiency_score: float = 0.1
review_quality: float = 0.1
reflection_quality: float = 0.1
final_score: float = 0.1
passed: bool = False
log: List[str] = Field(default_factory=list)
|