"""
Typed Pydantic models for the Retail Inventory & Expiry Management OpenEnv.
Defines Observation, Action, Reward, and supporting data structures.
"""

from __future__ import annotations
from enum import Enum
from typing import Dict, List, Optional
from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# Action space
# ---------------------------------------------------------------------------

class ActionType(str, Enum):
    DISCOUNT   = "discount"    # Apply a percentage discount to a product
    REORDER    = "reorder"     # Place a restock order for a product
    REMOVE     = "remove"      # Remove expired / unsellable product from shelf
    DO_NOTHING = "do_nothing"  # Take no action this step


class Action(BaseModel):
    """
    One action taken by the agent per time-step.

    Fields
    ------
    action_type : ActionType
        What kind of action to take.
    product_id : str
        Which product to act on.  Required for all actions except DO_NOTHING.
    discount_pct : float
        Discount percentage (0–80).  Only used when action_type == DISCOUNT.
    reorder_qty : int
        Number of units to reorder.  Only used when action_type == REORDER.
    """
    action_type : ActionType        = Field(..., description="Type of action")
    product_id  : Optional[str]     = Field(None, description="Target product ID")
    discount_pct: float             = Field(0.0, ge=0.0, le=80.0,
                                            description="Discount percentage (0-80)")
    reorder_qty : int               = Field(0,   ge=0,
                                            description="Units to reorder")


# ---------------------------------------------------------------------------
# Product snapshot (inside observation)
# ---------------------------------------------------------------------------

class ProductState(BaseModel):
    """Snapshot of a single product visible to the agent."""
    product_id      : str
    name            : str
    category        : str
    stock           : int           = Field(..., ge=0)
    price           : float         = Field(..., gt=0)
    cost            : float         = Field(..., gt=0)
    days_to_expiry  : int           = Field(..., description="Days until expiry; -1 = non-perishable")
    current_discount: float         = Field(0.0, ge=0.0, le=80.0)
    demand_estimate : float         = Field(..., description="Estimated daily demand (units)")
    is_expired      : bool          = False


# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------

class Observation(BaseModel):
    """
    Everything the agent can see at each time-step.

    Fields
    ------
    day              : Current simulation day (1-indexed).
    total_days       : Episode length in days.
    products         : List of product snapshots.
    daily_revenue    : Revenue earned on the current day.
    daily_waste_cost : Cost of items wasted (expired / removed) today.
    cumulative_revenue    : Total revenue so far this episode.
    cumulative_waste_cost : Total waste cost so far this episode.
    stockout_events  : Number of times any product ran out of stock today.
    budget_remaining : Remaining budget for reorder operations.
    """
    day                  : int
    total_days           : int
    products             : List[ProductState]
    daily_revenue        : float = 0.0
    daily_waste_cost     : float = 0.0
    cumulative_revenue   : float = 0.0
    cumulative_waste_cost: float = 0.0
    stockout_events      : int   = 0
    budget_remaining     : float = 0.0
    reward               : float = 0.001  # clamped step reward for OpenEnv standard


# ---------------------------------------------------------------------------
# Reward
# ---------------------------------------------------------------------------

class Reward(BaseModel):
    """
    Decomposed reward signal returned after each step.

    Components
    ----------
    sales_revenue   : +ve reward for units sold.
    waste_penalty   : -ve penalty for expired / removed units.
    stockout_penalty: -ve penalty for stockout events.
    reorder_cost    : -ve cost of placing a reorder.
    total           : Sum of all components.
    """
    sales_revenue   : float = 0.0
    waste_penalty   : float = 0.0
    stockout_penalty: float = 0.0
    reorder_cost    : float = 0.0
    total           : float = 0.0

    def compute_total(self) -> "Reward":
        self.total = (
            self.sales_revenue
            + self.waste_penalty
            + self.stockout_penalty
            + self.reorder_cost
        )
        return self


# ---------------------------------------------------------------------------
# Internal full product state (not exposed directly to agent)
# ---------------------------------------------------------------------------

class Product(BaseModel):
    """Full internal product state used by the simulation engine."""
    product_id      : str
    name            : str
    category        : str
    stock           : int
    price           : float
    cost            : float
    expiry_day      : int           # Absolute day when product expires (-1 = never)
    base_demand     : float         # Mean daily demand at full price
    current_discount: float = 0.0
    reorder_lead    : int   = 1     # Days until reorder arrives
    pending_reorder : int   = 0     # Units in transit

    def days_to_expiry(self, current_day: int) -> int:
        if self.expiry_day == -1:
            return -1
        return max(0, self.expiry_day - current_day)

    def to_product_state(self, current_day: int, demand_estimate: float) -> ProductState:
        dte = self.days_to_expiry(current_day)
        return ProductState(
            product_id      = self.product_id,
            name            = self.name,
            category        = self.category,
            stock           = self.stock,
            price           = self.price,
            cost            = self.cost,
            days_to_expiry  = dte,
            current_discount= self.current_discount,
            demand_estimate = round(demand_estimate, 2),
            is_expired      = (dte == 0 and self.expiry_day != -1),
        )