Spaces:

PrakashCider
/

teamforge

Sleeping

Your Name

fix(OpenEnv): implement system-wide [0.1, 0.9] boundary scrub for Phase 2 compliance

efa2d2a about 1 month ago

6.48 kB

	"""
	TeamForge Models
	Strictly-typed Pydantic models for all observations, actions, and state.
	"""

	from __future__ import annotations

	from enum import Enum
	from typing import Any, Dict, List, Literal, Optional, Union
	from pydantic import BaseModel, Field


	# ─────────────────────────────────────────────
	# ENUMS
	# ─────────────────────────────────────────────

	class TaskDifficulty(str, Enum):
	EASY = "easy"
	MEDIUM = "medium"
	HARD = "hard"
	BONUS = "bonus"


	class PhaseState(str, Enum):
	PLANNING = "planning"
	CODING = "coding"
	TESTING = "testing"
	REVIEWING = "reviewing"
	REFLECTING = "reflecting"
	DONE = "done"


	class ActionStatus(str, Enum):
	SUCCESS = "success"
	FAILURE = "failure"
	PARTIAL = "partial"


	# ─────────────────────────────────────────────
	# ACTIONS (Union-typed, discriminated by `type`)
	# ─────────────────────────────────────────────

	class PlanStep(BaseModel):
	type: Literal["plan_step"] = "plan_step"
	step_number: int = Field(..., ge=1, description="Sequential step number in the plan")
	description: str = Field(..., min_length=5, description="What this step accomplishes")
	estimated_effort: Literal["low", "medium", "high"] = "medium"
	depends_on: List[int] = Field(default_factory=list, description="Step numbers this depends on")


	class EditFile(BaseModel):
	type: Literal["edit_file"] = "edit_file"
	file_path: str = Field(..., description="Relative path inside the repo")
	content: str = Field(..., description="Full new content of the file")
	reason: str = Field(..., min_length=5, description="Why this edit is being made")


	class RunTests(BaseModel):
	type: Literal["run_tests"] = "run_tests"
	test_path: Optional[str] = Field(None, description="Specific test file or None for all tests")
	timeout_seconds: int = Field(default=30, ge=5, le=120)


	class RunLint(BaseModel):
	type: Literal["run_lint"] = "run_lint"
	fix: bool = Field(default=False, description="Whether to auto-fix lint issues")
	file_path: Optional[str] = Field(None, description="Specific file or None for entire repo")


	class GenerateReview(BaseModel):
	type: Literal["generate_review"] = "generate_review"
	focus_areas: List[str] = Field(
	default_factory=lambda: ["correctness", "style", "performance"],
	description="Areas to focus the review on"
	)
	review_text: str = Field(..., min_length=20, description="Detailed review of the current state")


	class Commit(BaseModel):
	type: Literal["commit"] = "commit"
	message: str = Field(..., min_length=10, description="Conventional-commit style message")
	files: List[str] = Field(default_factory=list, description="Files to stage; empty = all changed")


	class SelfReflect(BaseModel):
	type: Literal["self_reflect"] = "self_reflect"
	what_went_well: str = Field(..., min_length=10)
	what_to_improve: str = Field(..., min_length=10)
	adjusted_plan: Optional[str] = Field(None, description="Updated plan if reflecting mid-task")


	class RequestIteration(BaseModel):
	type: Literal["request_iteration"] = "request_iteration"
	reason: str = Field(..., min_length=10, description="Why another iteration is needed")
	target_issues: List[str] = Field(default_factory=list, description="Specific issues to address")


	# Discriminated union of all actions
	Action = Union[
	PlanStep,
	EditFile,
	RunTests,
	RunLint,
	GenerateReview,
	Commit,
	SelfReflect,
	RequestIteration,
	]


	# ─────────────────────────────────────────────
	# OBSERVATION
	# ─────────────────────────────────────────────

	class FileSnapshot(BaseModel):
	path: str
	content: str
	size_bytes: int = 0


	class TestResult(BaseModel):
	passed: int = 0
	failed: int = 0
	errors: int = 0
	output: str = ""
	duration_seconds: float = 0.0


	class LintResult(BaseModel):
	violations: int = 0
	output: str = ""
	score: float = 0.9 # 0.9 = clean


	class ReviewArtifact(BaseModel):
	reviewer: str = "agent"
	focus_areas: List[str] = Field(default_factory=list)
	text: str = ""
	timestamp_step: int = 0


	class ReflectionArtifact(BaseModel):
	step: int
	what_went_well: str
	what_to_improve: str
	adjusted_plan: Optional[str] = None


	class Observation(BaseModel):
	"""Full environment observation returned after every step."""
	# Task context
	task_id: str
	task_description: str
	difficulty: TaskDifficulty
	step_number: int
	max_steps: int
	phase: PhaseState

	# Repository state
	repo_files: List[FileSnapshot] = Field(default_factory=list)
	git_log: List[str] = Field(default_factory=list)

	# Execution results
	last_action_type: Optional[str] = None
	last_action_status: ActionStatus = ActionStatus.SUCCESS
	last_action_output: str = ""

	# Test & lint
	test_results: Optional[TestResult] = None
	lint_results: Optional[LintResult] = None

	# Team artifacts
	plan: List[PlanStep] = Field(default_factory=list)
	reviews: List[ReviewArtifact] = Field(default_factory=list)
	reflections: List[ReflectionArtifact] = Field(default_factory=list)

	# Signals
	reward: float = 0.1
	cumulative_reward: float = 0.1
	done: bool = False
	info: Dict[str, Any] = Field(default_factory=dict)


	# ─────────────────────────────────────────────
	# EPISODE RESULT (for grader)
	# ─────────────────────────────────────────────

	class EpisodeResult(BaseModel):
	task_id: str
	total_steps: int
	test_pass_rate: float = 0.1
	lint_score: float = 0.1
	efficiency_score: float = 0.1
	review_quality: float = 0.1
	reflection_quality: float = 0.1
	final_score: float = 0.1
	passed: bool = False
	log: List[str] = Field(default_factory=list)