"""
models.py — SpectraQual Typed Pydantic Models
OpenEnv spec requires: typed Observation, Action, Reward models.
"""

from __future__ import annotations
from typing import List, Literal, Optional, Dict, Any
from pydantic import BaseModel, Field


# ---------------------------
# PCB OBSERVATION
# ---------------------------
class PCBObservation(BaseModel):
    """Observation returned after each reset() or step()."""

    board_id: str = Field(..., description="Unique board identifier, e.g. SQ-4321")
    defect_type: Literal[
        "none", "missing_component", "solder_bridge", "short_circuit"
    ] = Field(..., description="Type of defect detected on the PCB")
    component_cost: float = Field(
        ..., ge=10.0, le=200.0, description="Replacement cost of damaged component in ₹"
    )
    criticality: float = Field(
        ..., ge=0.1, le=1.0, description="Risk score — higher means more critical circuit"
    )
    slots_free: int = Field(
        ..., ge=0, description="Number of soldering slots currently available"
    )
    slots_state: List[int] = Field(
        ..., description="Remaining time units for each soldering slot (0=free)"
    )
    is_anomaly: bool = Field(
        False, description="True if this board exhibits rare/unusual characteristics"
    )
    anomaly_score: float = Field(
        0.0, ge=0.0, le=1.0, description="Anomaly confidence (0=normal, 1=highly anomalous)"
    )
    step: int = Field(..., ge=0, description="Current step number in the episode")
    task_id: str = Field(..., description="ID of the active task")
    valid_actions: List[str] = Field(
        ..., description="List of valid actions for this observation"
    )

    # --- Real-time metrics ---
    rolling_accuracy: float = Field(
        0.0, ge=0.0, le=1.0, description="Fraction of correct decisions so far"
    )
    throughput: float = Field(
        0.0, ge=0.0, description="Boards processed per step so far"
    )
    cumulative_reward: float = Field(
        0.0, description="Cumulative normalized reward so far in this episode"
    )


# ---------------------------
# PCB ACTION
# ---------------------------
class PCBAction(BaseModel):
    """Action submitted by an agent to the environment."""

    action: Literal[
        "PASS",
        "SCRAP",
        "ROUTE_COMPONENT_REPLACEMENT",
        "ROUTE_SOLDERING",
        "ROUTE_DIAGNOSTICS",
        "WAIT",
    ] = Field(..., description="Decision made for the current PCB")


# ---------------------------
# REWARD COMPONENTS
# ---------------------------
class RewardComponents(BaseModel):
    """Decomposed reward signal for transparency and debugging."""

    defect_reward: float = Field(
        ..., description="Score for handling the defect correctly (0.0–1.0)"
    )
    cost_efficiency: float = Field(
        ..., description="Economic value retained vs. lost (0.0–1.0)"
    )
    queue_penalty: float = Field(
        ..., description="Penalty for creating factory bottlenecks (0.0–1.0, lower is worse)"
    )
    criticality_factor: float = Field(
        ..., description="Risk-adjusted modifier based on criticality (0.0–1.0)"
    )
    anomaly_bonus: float = Field(
        0.0, description="Bonus for correctly flagging/handling anomalous board (0.0–1.0)"
    )
    total_raw: float = Field(
        ..., description="Weighted sum of all components before normalization"
    )
    normalized: float = Field(
        ..., ge=0.0, le=1.0, description="Final normalized reward in [0.0, 1.0]"
    )
    explanation: str = Field(
        ..., description="Human-readable explanation of why this reward was given"
    )


# ---------------------------
# STEP RESULT
# ---------------------------
class StepResult(BaseModel):
    """Full result returned by step() and reset()."""

    observation: PCBObservation
    reward: float = Field(
        0.0, ge=0.0, le=1.0, description="Normalized reward for this step [0.0, 1.0]"
    )
    reward_components: Optional[RewardComponents] = Field(
        None, description="Detailed breakdown of reward components"
    )
    done: bool = Field(..., description="True if the episode has ended")
    info: Dict[str, Any] = Field(
        default_factory=dict, description="Additional diagnostic info"
    )


# ---------------------------
# TASK RESULT (for graders)
# ---------------------------
class TaskResult(BaseModel):
    """Summary of a completed task run, consumed by graders."""

    task_id: str
    total_steps: int
    rewards: List[float]                  # per-step normalized rewards
    correct_decisions: int
    total_decisions: int
    bottleneck_count: int                  # times queue was maxed out
    anomaly_total: int                     # how many anomaly boards appeared
    anomaly_flagged: int                   # how many the agent correctly flagged
    cumulative_raw_reward: float
    max_possible_raw: float
    final_score: float = 0.0              # filled by grader