teamforge / models.py
Your Name
fix(OpenEnv): implement system-wide [0.1, 0.9] boundary scrub for Phase 2 compliance
efa2d2a
"""
TeamForge Models
Strictly-typed Pydantic models for all observations, actions, and state.
"""
from __future__ import annotations
from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, Field
# ─────────────────────────────────────────────
# ENUMS
# ─────────────────────────────────────────────
class TaskDifficulty(str, Enum):
EASY = "easy"
MEDIUM = "medium"
HARD = "hard"
BONUS = "bonus"
class PhaseState(str, Enum):
PLANNING = "planning"
CODING = "coding"
TESTING = "testing"
REVIEWING = "reviewing"
REFLECTING = "reflecting"
DONE = "done"
class ActionStatus(str, Enum):
SUCCESS = "success"
FAILURE = "failure"
PARTIAL = "partial"
# ─────────────────────────────────────────────
# ACTIONS (Union-typed, discriminated by `type`)
# ─────────────────────────────────────────────
class PlanStep(BaseModel):
type: Literal["plan_step"] = "plan_step"
step_number: int = Field(..., ge=1, description="Sequential step number in the plan")
description: str = Field(..., min_length=5, description="What this step accomplishes")
estimated_effort: Literal["low", "medium", "high"] = "medium"
depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on")
class EditFile(BaseModel):
type: Literal["edit_file"] = "edit_file"
file_path: str = Field(..., description="Relative path inside the repo")
content: str = Field(..., description="Full new content of the file")
reason: str = Field(..., min_length=5, description="Why this edit is being made")
class RunTests(BaseModel):
type: Literal["run_tests"] = "run_tests"
test_path: Optional[str] = Field(None, description="Specific test file or None for all tests")
timeout_seconds: int = Field(default=30, ge=5, le=120)
class RunLint(BaseModel):
type: Literal["run_lint"] = "run_lint"
fix: bool = Field(default=False, description="Whether to auto-fix lint issues")
file_path: Optional[str] = Field(None, description="Specific file or None for entire repo")
class GenerateReview(BaseModel):
type: Literal["generate_review"] = "generate_review"
focus_areas: List[str] = Field(
default_factory=lambda: ["correctness", "style", "performance"],
description="Areas to focus the review on"
)
review_text: str = Field(..., min_length=20, description="Detailed review of the current state")
class Commit(BaseModel):
type: Literal["commit"] = "commit"
message: str = Field(..., min_length=10, description="Conventional-commit style message")
files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed")
class SelfReflect(BaseModel):
type: Literal["self_reflect"] = "self_reflect"
what_went_well: str = Field(..., min_length=10)
what_to_improve: str = Field(..., min_length=10)
adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task")
class RequestIteration(BaseModel):
type: Literal["request_iteration"] = "request_iteration"
reason: str = Field(..., min_length=10, description="Why another iteration is needed")
target_issues: List[str] = Field(default_factory=list, description="Specific issues to address")
# Discriminated union of all actions
Action = Union[
PlanStep,
EditFile,
RunTests,
RunLint,
GenerateReview,
Commit,
SelfReflect,
RequestIteration,
]
# ─────────────────────────────────────────────
# OBSERVATION
# ─────────────────────────────────────────────
class FileSnapshot(BaseModel):
path: str
content: str
size_bytes: int = 0
class TestResult(BaseModel):
passed: int = 0
failed: int = 0
errors: int = 0
output: str = ""
duration_seconds: float = 0.0
class LintResult(BaseModel):
violations: int = 0
output: str = ""
score: float = 0.9 # 0.9 = clean
class ReviewArtifact(BaseModel):
reviewer: str = "agent"
focus_areas: List[str] = Field(default_factory=list)
text: str = ""
timestamp_step: int = 0
class ReflectionArtifact(BaseModel):
step: int
what_went_well: str
what_to_improve: str
adjusted_plan: Optional[str] = None
class Observation(BaseModel):
"""Full environment observation returned after every step."""
# Task context
task_id: str
task_description: str
difficulty: TaskDifficulty
step_number: int
max_steps: int
phase: PhaseState
# Repository state
repo_files: List[FileSnapshot] = Field(default_factory=list)
git_log: List[str] = Field(default_factory=list)
# Execution results
last_action_type: Optional[str] = None
last_action_status: ActionStatus = ActionStatus.SUCCESS
last_action_output: str = ""
# Test & lint
test_results: Optional[TestResult] = None
lint_results: Optional[LintResult] = None
# Team artifacts
plan: List[PlanStep] = Field(default_factory=list)
reviews: List[ReviewArtifact] = Field(default_factory=list)
reflections: List[ReflectionArtifact] = Field(default_factory=list)
# Signals
reward: float = 0.1
cumulative_reward: float = 0.1
done: bool = False
info: Dict[str, Any] = Field(default_factory=dict)
# ─────────────────────────────────────────────
# EPISODE RESULT (for grader)
# ─────────────────────────────────────────────
class EpisodeResult(BaseModel):
task_id: str
total_steps: int
test_pass_rate: float = 0.1
lint_score: float = 0.1
efficiency_score: float = 0.1
review_quality: float = 0.1
reflection_quality: float = 0.1
final_score: float = 0.1
passed: bool = False
log: List[str] = Field(default_factory=list)