| from pydantic import BaseModel, Field |
| from typing import Optional, List, Any, Dict |
|
|
|
|
| class ConflictObservation(BaseModel): |
| """What the agent sees at each step.""" |
| task_name: str = Field(description="Name of the current task") |
| task_description: str = Field(description="What the agent must do") |
| filename: str = Field(description="Name of the file being resolved") |
| file_language: str = Field(description="Programming language of the file (python, text, etc.)") |
| conflicted_content: str = Field(description="Full file content with Git conflict markers") |
| branch_ours: str = Field(description="Name of the HEAD (ours) branch") |
| branch_theirs: str = Field(description="Name of the incoming (theirs) branch") |
| num_conflicts: int = Field(description="Number of conflict blocks in the file") |
| last_attempt: Optional[str] = Field(default=None, description="Agent's previous resolution attempt") |
| last_error: Optional[str] = Field(default=None, description="Feedback from last grading") |
| step: int = Field(default=0) |
| max_steps: int = Field(default=10) |
| done: bool = Field(default=False) |
|
|
|
|
| class ConflictAction(BaseModel): |
| """The action the agent takes — the resolved file content.""" |
| resolved_content: str = Field( |
| description="The full file content after resolving all conflict markers. " |
| "Must contain NO <<<<<<<, =======, or >>>>>>> lines." |
| ) |
|
|
|
|
| class ConflictReward(BaseModel): |
| """Reward signal returned after each step.""" |
| value: float = Field(ge=0.0, le=1.0, description="Reward value between 0.0 and 1.0") |
| reason: str = Field(description="Human-readable explanation of the reward") |
| partial_scores: Dict[str, float] = Field(default_factory=dict) |
|
|
|
|
| class StepResponse(BaseModel): |
| """Full response from a step.""" |
| observation: ConflictObservation |
| reward: ConflictReward |
| done: bool |
| info: Dict[str, Any] = Field(default_factory=dict) |
|
|
|
|
| class ResetRequest(BaseModel): |
| """Request body for /reset.""" |
| task: str = Field( |
| default="single_conflict", |
| description="Task name: single_conflict | multi_conflict | logic_conflict" |
| ) |
|
|
|
|
| class StateResponse(BaseModel): |
| """Current internal state of the environment.""" |
| task_name: Optional[str] |
| step: int |
| done: bool |
| total_reward: float |
| history: List[Dict[str, Any]] |
|
|