Spaces:

PrakashCider
/

teamforge

Sleeping

File size: 6,484 Bytes

"""
TeamForge Models
Strictly-typed Pydantic models for all observations, actions, and state.
"""

from __future__ import annotations

from enum import Enum
from typing import Any, Dict, List, Literal, Optional, Union
from pydantic import BaseModel, Field


# ─────────────────────────────────────────────
# ENUMS
# ─────────────────────────────────────────────

class TaskDifficulty(str, Enum):
    EASY = "easy"
    MEDIUM = "medium"
    HARD = "hard"
    BONUS = "bonus"


class PhaseState(str, Enum):
    PLANNING = "planning"
    CODING = "coding"
    TESTING = "testing"
    REVIEWING = "reviewing"
    REFLECTING = "reflecting"
    DONE = "done"


class ActionStatus(str, Enum):
    SUCCESS = "success"
    FAILURE = "failure"
    PARTIAL = "partial"


# ─────────────────────────────────────────────
# ACTIONS  (Union-typed, discriminated by `type`)
# ─────────────────────────────────────────────

class PlanStep(BaseModel):
    type: Literal["plan_step"] = "plan_step"
    step_number: int = Field(..., ge=1, description="Sequential step number in the plan")
    description: str = Field(..., min_length=5, description="What this step accomplishes")
    estimated_effort: Literal["low", "medium", "high"] = "medium"
    depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on")


class EditFile(BaseModel):
    type: Literal["edit_file"] = "edit_file"
    file_path: str = Field(..., description="Relative path inside the repo")
    content: str = Field(..., description="Full new content of the file")
    reason: str = Field(..., min_length=5, description="Why this edit is being made")


class RunTests(BaseModel):
    type: Literal["run_tests"] = "run_tests"
    test_path: Optional[str] = Field(None, description="Specific test file or None for all tests")
    timeout_seconds: int = Field(default=30, ge=5, le=120)


class RunLint(BaseModel):
    type: Literal["run_lint"] = "run_lint"
    fix: bool = Field(default=False, description="Whether to auto-fix lint issues")
    file_path: Optional[str] = Field(None, description="Specific file or None for entire repo")


class GenerateReview(BaseModel):
    type: Literal["generate_review"] = "generate_review"
    focus_areas: List[str] = Field(
        default_factory=lambda: ["correctness", "style", "performance"],
        description="Areas to focus the review on"
    )
    review_text: str = Field(..., min_length=20, description="Detailed review of the current state")


class Commit(BaseModel):
    type: Literal["commit"] = "commit"
    message: str = Field(..., min_length=10, description="Conventional-commit style message")
    files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed")


class SelfReflect(BaseModel):
    type: Literal["self_reflect"] = "self_reflect"
    what_went_well: str = Field(..., min_length=10)
    what_to_improve: str = Field(..., min_length=10)
    adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task")


class RequestIteration(BaseModel):
    type: Literal["request_iteration"] = "request_iteration"
    reason: str = Field(..., min_length=10, description="Why another iteration is needed")
    target_issues: List[str] = Field(default_factory=list, description="Specific issues to address")


# Discriminated union of all actions
Action = Union[
    PlanStep,
    EditFile,
    RunTests,
    RunLint,
    GenerateReview,
    Commit,
    SelfReflect,
    RequestIteration,
]


# ─────────────────────────────────────────────
# OBSERVATION
# ─────────────────────────────────────────────

class FileSnapshot(BaseModel):
    path: str
    content: str
    size_bytes: int = 0


class TestResult(BaseModel):
    passed: int = 0
    failed: int = 0
    errors: int = 0
    output: str = ""
    duration_seconds: float = 0.0


class LintResult(BaseModel):
    violations: int = 0
    output: str = ""
    score: float = 0.9  # 0.9 = clean


class ReviewArtifact(BaseModel):
    reviewer: str = "agent"
    focus_areas: List[str] = Field(default_factory=list)
    text: str = ""
    timestamp_step: int = 0


class ReflectionArtifact(BaseModel):
    step: int
    what_went_well: str
    what_to_improve: str
    adjusted_plan: Optional[str] = None


class Observation(BaseModel):
    """Full environment observation returned after every step."""
    # Task context
    task_id: str
    task_description: str
    difficulty: TaskDifficulty
    step_number: int
    max_steps: int
    phase: PhaseState

    # Repository state
    repo_files: List[FileSnapshot] = Field(default_factory=list)
    git_log: List[str] = Field(default_factory=list)

    # Execution results
    last_action_type: Optional[str] = None
    last_action_status: ActionStatus = ActionStatus.SUCCESS
    last_action_output: str = ""

    # Test & lint
    test_results: Optional[TestResult] = None
    lint_results: Optional[LintResult] = None

    # Team artifacts
    plan: List[PlanStep] = Field(default_factory=list)
    reviews: List[ReviewArtifact] = Field(default_factory=list)
    reflections: List[ReflectionArtifact] = Field(default_factory=list)

    # Signals
    reward: float = 0.1
    cumulative_reward: float = 0.1
    done: bool = False
    info: Dict[str, Any] = Field(default_factory=dict)


# ─────────────────────────────────────────────
# EPISODE RESULT (for grader)
# ─────────────────────────────────────────────

class EpisodeResult(BaseModel):
    task_id: str
    total_steps: int
    test_pass_rate: float = 0.1
    lint_score: float = 0.1
    efficiency_score: float = 0.1
    review_quality: float = 0.1
    reflection_quality: float = 0.1
    final_score: float = 0.1
    passed: bool = False
    log: List[str] = Field(default_factory=list)