from pydantic import BaseModel, Field
from typing import Optional, List, Any, Dict


class ConflictObservation(BaseModel):
    """What the agent sees at each step."""
    task_name: str = Field(description="Name of the current task")
    task_description: str = Field(description="What the agent must do")
    filename: str = Field(description="Name of the file being resolved")
    file_language: str = Field(description="Programming language of the file (python, text, etc.)")
    conflicted_content: str = Field(description="Full file content with Git conflict markers")
    branch_ours: str = Field(description="Name of the HEAD (ours) branch")
    branch_theirs: str = Field(description="Name of the incoming (theirs) branch")
    num_conflicts: int = Field(description="Number of conflict blocks in the file")
    last_attempt: Optional[str] = Field(default=None, description="Agent's previous resolution attempt")
    last_error: Optional[str] = Field(default=None, description="Feedback from last grading")
    step: int = Field(default=0)
    max_steps: int = Field(default=10)
    done: bool = Field(default=False)


class ConflictAction(BaseModel):
    """The action the agent takes — the resolved file content."""
    resolved_content: str = Field(
        description="The full file content after resolving all conflict markers. "
                    "Must contain NO <<<<<<<, =======, or >>>>>>> lines."
    )


class ConflictReward(BaseModel):
    """Reward signal returned after each step."""
    value: float = Field(ge=0.0, le=1.0, description="Reward value between 0.0 and 1.0")
    reason: str = Field(description="Human-readable explanation of the reward")
    partial_scores: Dict[str, float] = Field(default_factory=dict)


class StepResponse(BaseModel):
    """Full response from a step."""
    observation: ConflictObservation
    reward: ConflictReward
    done: bool
    info: Dict[str, Any] = Field(default_factory=dict)


class ResetRequest(BaseModel):
    """Request body for /reset."""
    task: str = Field(
        default="single_conflict",
        description="Task name: single_conflict | multi_conflict | logic_conflict"
    )


class StateResponse(BaseModel):
    """Current internal state of the environment."""
    task_name: Optional[str]
    step: int
    done: bool
    total_reward: float
    history: List[Dict[str, Any]]