from pydantic import BaseModel, Field
from typing import Optional, List, Literal, Dict, Any


class CodeComment(BaseModel):
    line_number: int = Field(..., description="Line number being commented on (1-indexed)")
    issue_type: Literal["bug", "security", "performance", "style", "logic"] = Field(
        ..., description="Type of issue found"
    )
    severity: Literal["critical", "major", "minor"] = Field(
        ..., description="Severity level of the issue"
    )
    description: str = Field(..., description="Detailed description of the issue found")
    suggested_fix: Optional[str] = Field(None, description="Suggested fix for the issue")


class Action(BaseModel):
    comments: List[CodeComment] = Field(
        default_factory=list, description="List of code review comments on specific lines"
    )
    verdict: Literal["approve", "request_changes", "comment"] = Field(
        ..., description="Final verdict on the pull request"
    )
    summary: Optional[str] = Field(None, description="Overall summary of the review")


class Observation(BaseModel):
    diff: str = Field(..., description="The code diff/patch to review")
    file_name: str = Field(..., description="Name of the file being reviewed")
    pr_title: str = Field(..., description="Title of the pull request")
    pr_description: str = Field(..., description="Description of the pull request")
    step_number: int = Field(..., description="Current step number in the episode")
    max_steps: int = Field(..., description="Maximum steps allowed in this episode")
    task_id: str = Field(..., description="Current task identifier (easy/medium/hard)")
    task_description: str = Field(..., description="Description of the task objective")


class Reward(BaseModel):
    value: float = Field(..., description="Reward value between -1.0 and 1.0")
    breakdown: Dict[str, float] = Field(
        default_factory=dict, description="Breakdown of reward components"
    )
    message: str = Field(..., description="Human-readable explanation of the reward")
    issues_found: int = Field(0, description="Number of issues correctly identified")
    issues_missed: int = Field(0, description="Number of known issues missed")
    false_positives: int = Field(0, description="Number of false positive comments")


class EnvironmentState(BaseModel):
    task_id: str
    step_number: int
    max_steps: int
    done: bool
    total_reward: float
    current_diff: str
    known_issue_count: int
    agent_comment_count: int
    episode_history: List[Dict[str, Any]]


class TaskInfo(BaseModel):
    id: str
    name: str
    description: str
    difficulty: Literal["easy", "medium", "hard"]
    max_steps: int
    action_schema: Dict[str, Any]


class GraderInput(BaseModel):
    task_id: str
    episode_history: List[Dict[str, Any]]
    final_action: Optional[Dict[str, Any]] = None


class GraderOutput(BaseModel):
    score: float = Field(..., ge=0.0, le=1.0, description="Score between 0.0 and 1.0")
    task_id: str
    breakdown: Dict[str, float]
    feedback: str
    issues_found: int
    issues_missed: int
    false_positives: int