Spaces:
Sleeping
Sleeping
Your Name
fix(OpenEnv): implement system-wide [0.1, 0.9] boundary scrub for Phase 2 compliance
efa2d2a | """ | |
| TeamForge Models | |
| Strictly-typed Pydantic models for all observations, actions, and state. | |
| """ | |
| from __future__ import annotations | |
| from enum import Enum | |
| from typing import Any, Dict, List, Literal, Optional, Union | |
| from pydantic import BaseModel, Field | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # ENUMS | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class TaskDifficulty(str, Enum): | |
| EASY = "easy" | |
| MEDIUM = "medium" | |
| HARD = "hard" | |
| BONUS = "bonus" | |
| class PhaseState(str, Enum): | |
| PLANNING = "planning" | |
| CODING = "coding" | |
| TESTING = "testing" | |
| REVIEWING = "reviewing" | |
| REFLECTING = "reflecting" | |
| DONE = "done" | |
| class ActionStatus(str, Enum): | |
| SUCCESS = "success" | |
| FAILURE = "failure" | |
| PARTIAL = "partial" | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # ACTIONS (Union-typed, discriminated by `type`) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class PlanStep(BaseModel): | |
| type: Literal["plan_step"] = "plan_step" | |
| step_number: int = Field(..., ge=1, description="Sequential step number in the plan") | |
| description: str = Field(..., min_length=5, description="What this step accomplishes") | |
| estimated_effort: Literal["low", "medium", "high"] = "medium" | |
| depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on") | |
| class EditFile(BaseModel): | |
| type: Literal["edit_file"] = "edit_file" | |
| file_path: str = Field(..., description="Relative path inside the repo") | |
| content: str = Field(..., description="Full new content of the file") | |
| reason: str = Field(..., min_length=5, description="Why this edit is being made") | |
| class RunTests(BaseModel): | |
| type: Literal["run_tests"] = "run_tests" | |
| test_path: Optional[str] = Field(None, description="Specific test file or None for all tests") | |
| timeout_seconds: int = Field(default=30, ge=5, le=120) | |
| class RunLint(BaseModel): | |
| type: Literal["run_lint"] = "run_lint" | |
| fix: bool = Field(default=False, description="Whether to auto-fix lint issues") | |
| file_path: Optional[str] = Field(None, description="Specific file or None for entire repo") | |
| class GenerateReview(BaseModel): | |
| type: Literal["generate_review"] = "generate_review" | |
| focus_areas: List[str] = Field( | |
| default_factory=lambda: ["correctness", "style", "performance"], | |
| description="Areas to focus the review on" | |
| ) | |
| review_text: str = Field(..., min_length=20, description="Detailed review of the current state") | |
| class Commit(BaseModel): | |
| type: Literal["commit"] = "commit" | |
| message: str = Field(..., min_length=10, description="Conventional-commit style message") | |
| files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed") | |
| class SelfReflect(BaseModel): | |
| type: Literal["self_reflect"] = "self_reflect" | |
| what_went_well: str = Field(..., min_length=10) | |
| what_to_improve: str = Field(..., min_length=10) | |
| adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task") | |
| class RequestIteration(BaseModel): | |
| type: Literal["request_iteration"] = "request_iteration" | |
| reason: str = Field(..., min_length=10, description="Why another iteration is needed") | |
| target_issues: List[str] = Field(default_factory=list, description="Specific issues to address") | |
| # Discriminated union of all actions | |
| Action = Union[ | |
| PlanStep, | |
| EditFile, | |
| RunTests, | |
| RunLint, | |
| GenerateReview, | |
| Commit, | |
| SelfReflect, | |
| RequestIteration, | |
| ] | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # OBSERVATION | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class FileSnapshot(BaseModel): | |
| path: str | |
| content: str | |
| size_bytes: int = 0 | |
| class TestResult(BaseModel): | |
| passed: int = 0 | |
| failed: int = 0 | |
| errors: int = 0 | |
| output: str = "" | |
| duration_seconds: float = 0.0 | |
| class LintResult(BaseModel): | |
| violations: int = 0 | |
| output: str = "" | |
| score: float = 0.9 # 0.9 = clean | |
| class ReviewArtifact(BaseModel): | |
| reviewer: str = "agent" | |
| focus_areas: List[str] = Field(default_factory=list) | |
| text: str = "" | |
| timestamp_step: int = 0 | |
| class ReflectionArtifact(BaseModel): | |
| step: int | |
| what_went_well: str | |
| what_to_improve: str | |
| adjusted_plan: Optional[str] = None | |
| class Observation(BaseModel): | |
| """Full environment observation returned after every step.""" | |
| # Task context | |
| task_id: str | |
| task_description: str | |
| difficulty: TaskDifficulty | |
| step_number: int | |
| max_steps: int | |
| phase: PhaseState | |
| # Repository state | |
| repo_files: List[FileSnapshot] = Field(default_factory=list) | |
| git_log: List[str] = Field(default_factory=list) | |
| # Execution results | |
| last_action_type: Optional[str] = None | |
| last_action_status: ActionStatus = ActionStatus.SUCCESS | |
| last_action_output: str = "" | |
| # Test & lint | |
| test_results: Optional[TestResult] = None | |
| lint_results: Optional[LintResult] = None | |
| # Team artifacts | |
| plan: List[PlanStep] = Field(default_factory=list) | |
| reviews: List[ReviewArtifact] = Field(default_factory=list) | |
| reflections: List[ReflectionArtifact] = Field(default_factory=list) | |
| # Signals | |
| reward: float = 0.1 | |
| cumulative_reward: float = 0.1 | |
| done: bool = False | |
| info: Dict[str, Any] = Field(default_factory=dict) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| # EPISODE RESULT (for grader) | |
| # βββββββββββββββββββββββββββββββββββββββββββββ | |
| class EpisodeResult(BaseModel): | |
| task_id: str | |
| total_steps: int | |
| test_pass_rate: float = 0.1 | |
| lint_score: float = 0.1 | |
| efficiency_score: float = 0.1 | |
| review_quality: float = 0.1 | |
| reflection_quality: float = 0.1 | |
| final_score: float = 0.1 | |
| passed: bool = False | |
| log: List[str] = Field(default_factory=list) | |