doosaganesh's picture
Upload 13 files
74f8904 verified
from pydantic import BaseModel, Field
from typing import Optional, List, Any, Dict
class ConflictObservation(BaseModel):
"""What the agent sees at each step."""
task_name: str = Field(description="Name of the current task")
task_description: str = Field(description="What the agent must do")
filename: str = Field(description="Name of the file being resolved")
file_language: str = Field(description="Programming language of the file (python, text, etc.)")
conflicted_content: str = Field(description="Full file content with Git conflict markers")
branch_ours: str = Field(description="Name of the HEAD (ours) branch")
branch_theirs: str = Field(description="Name of the incoming (theirs) branch")
num_conflicts: int = Field(description="Number of conflict blocks in the file")
last_attempt: Optional[str] = Field(default=None, description="Agent's previous resolution attempt")
last_error: Optional[str] = Field(default=None, description="Feedback from last grading")
step: int = Field(default=0)
max_steps: int = Field(default=10)
done: bool = Field(default=False)
class ConflictAction(BaseModel):
"""The action the agent takes — the resolved file content."""
resolved_content: str = Field(
description="The full file content after resolving all conflict markers. "
"Must contain NO <<<<<<<, =======, or >>>>>>> lines."
)
class ConflictReward(BaseModel):
"""Reward signal returned after each step."""
value: float = Field(ge=0.0, le=1.0, description="Reward value between 0.0 and 1.0")
reason: str = Field(description="Human-readable explanation of the reward")
partial_scores: Dict[str, float] = Field(default_factory=dict)
class StepResponse(BaseModel):
"""Full response from a step."""
observation: ConflictObservation
reward: ConflictReward
done: bool
info: Dict[str, Any] = Field(default_factory=dict)
class ResetRequest(BaseModel):
"""Request body for /reset."""
task: str = Field(
default="single_conflict",
description="Task name: single_conflict | multi_conflict | logic_conflict"
)
class StateResponse(BaseModel):
"""Current internal state of the environment."""
task_name: Optional[str]
step: int
done: bool
total_reward: float
history: List[Dict[str, Any]]