"""
Data models for the Creative Friction Gym Environment.

An ad-creative RL environment where the agent manages a portfolio of
ad creatives, choosing the messaging angle, the amount of "friction"
(qualifying text), and how much budget to allocate each step.

5 creative angles:
  FOMO, Social Proof, Benefit-Driven, Curiosity, Value

Friction ranges from 0.0 (no qualifier — "Get it free!") to
1.0 (maximum qualifier — "Enterprise only, schedule a demo").

Budget multiplier ranges from 0.5x (conserve spend) to 2.0x
(double down on a winning creative).

Conversions in tasks 2 & 3 are delayed: users click now but
convert 1-5 steps later. The agent must make decisions with
incomplete revenue information.
"""

from typing import Any, Dict, List, Optional

from openenv.core.env_server.types import Action, Observation
from pydantic import Field


# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------

# Creative Angles
ANGLE_FOMO = 0           # "Only 3 left!" urgency messaging
ANGLE_SOCIAL_PROOF = 1   # "Join 10,000+ customers"
ANGLE_BENEFIT = 2        # "Save 2 hours every day"
ANGLE_CURIOSITY = 3      # "The secret top performers use"
ANGLE_VALUE = 4          # "Premium quality, fair price"

NUM_ANGLES = 5
ANGLE_NAMES = ["fomo", "social_proof", "benefit", "curiosity", "value"]

# Friction bounds
MIN_FRICTION = 0.0
MAX_FRICTION = 1.0

# Budget multiplier bounds
MIN_SPEND_MULTIPLIER = 0.5
MAX_SPEND_MULTIPLIER = 2.0

# Conversion delay parameters (tasks 2 & 3 only)
# Delay is sampled from geometric distribution with this mean
CONVERSION_DELAY_MEAN = 3  # steps

# User segments (internal to simulation)
SEGMENT_BOT = 0
SEGMENT_WINDOW_SHOPPER = 1
SEGMENT_BARGAIN_HUNTER = 2
SEGMENT_HIGH_INTENT = 3
NUM_SEGMENTS = 4
SEGMENT_NAMES = ["bot", "window_shopper", "bargain_hunter", "high_intent"]

# Default segment weights (proportion of impressions)
DEFAULT_SEGMENT_WEIGHTS = [0.15, 0.35, 0.30, 0.20]

# Per-segment base click probability
SEGMENT_BASE_CLICK = [0.08, 0.04, 0.03, 0.015]

# Per-segment conversion rate (given a click)
SEGMENT_CONVERSION_RATE = [0.00, 0.02, 0.08, 0.35]

# Per-segment revenue per conversion ($)
SEGMENT_REVENUE = [0.0, 5.0, 15.0, 80.0]

# Per-angle effectiveness multiplier per segment [angle][segment]
# Rows: FOMO, Social Proof, Benefit, Curiosity, Value
# Cols: Bot, Window Shopper, Bargain Hunter, High Intent
ANGLE_SEGMENT_MATRIX = [
    [1.2, 1.3, 1.1, 0.8],   # FOMO
    [0.9, 1.1, 1.0, 1.3],   # Social Proof
    [1.0, 0.9, 0.8, 1.4],   # Benefit
    [1.3, 1.2, 1.0, 0.7],   # Curiosity
    [0.8, 1.0, 1.4, 1.1],   # Value
]

# Impressions per step (fixed budget)
IMPRESSIONS_PER_STEP = 1000

# Cost per mille (CPM) in dollars
CPM = 12.0

# Tasks
TASK_NAMES = ["maximize_ctr", "maximize_roas", "survive_fatigue"]
MAX_STEPS_PER_TASK = {
    "maximize_ctr": 30,
    "maximize_roas": 40,
    "survive_fatigue": 50,
}


# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------

class CreativeFrictionAction(Action):
    """Action: choose a creative angle, friction level, and budget allocation.

    angle:
      0 = FOMO ("Only 3 left!")
      1 = Social Proof ("Join 10,000+ customers")
      2 = Benefit ("Save 2 hours every day")
      3 = Curiosity ("The secret top performers use")
      4 = Value ("Premium quality, fair price")

    friction:
      0.0 = No qualifying text (maximizes clicks, attracts everyone)
      0.5 = Moderate qualifier ("Starting at $99/mo")
      1.0 = Heavy qualifier ("Enterprise only, schedule a demo")

    spend_multiplier:
      0.5 = Conserve budget (500 impressions)
      1.0 = Standard budget (1000 impressions)
      2.0 = Double down (2000 impressions)

    Higher friction reduces total clicks but filters out low-intent
    users and bots, potentially improving conversion quality and ROAS.
    Higher spend increases impressions and cost proportionally.
    """

    angle: int = Field(
        ...,
        description=(
            "Creative angle: "
            "0=FOMO, 1=Social Proof, 2=Benefit, 3=Curiosity, 4=Value"
        ),
        ge=0,
        le=NUM_ANGLES - 1,
    )
    friction: float = Field(
        ...,
        description=(
            "Friction level 0.0-1.0: amount of qualifying text. "
            "0.0=no qualifier (max clicks), 1.0=max qualifier (filters low-intent)"
        ),
        ge=MIN_FRICTION,
        le=MAX_FRICTION,
    )
    spend_multiplier: float = Field(
        default=1.0,
        description=(
            "Budget multiplier 0.5-2.0: scales impressions and cost. "
            "0.5=conserve, 1.0=standard, 2.0=double down"
        ),
        ge=MIN_SPEND_MULTIPLIER,
        le=MAX_SPEND_MULTIPLIER,
    )


# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------

class CreativeFrictionObservation(Observation):
    """Observation from the ad-creative environment."""

    # Task info
    task_name: str = Field(default="maximize_ctr", description="Current task/scenario name")
    step_number: int = Field(default=0, description="Current simulation step")
    max_steps: int = Field(default=30, description="Total steps for this task")

    # Last step performance
    last_ctr: float = Field(default=0.0, description="Click-through rate of last ad (0.0-1.0)")
    last_conversion_rate: float = Field(
        default=0.0, description="Conversion rate among clickers (0.0-1.0)"
    )
    last_revenue: float = Field(default=0.0, description="Revenue generated last step ($)")
    last_cost: float = Field(default=0.0, description="Ad spend last step ($)")
    last_roas: float = Field(default=0.0, description="Return on ad spend (revenue/cost)")

    # Rolling history (last 5 steps)
    recent_ctrs: List[float] = Field(
        default_factory=list, description="CTR for last 5 steps"
    )
    recent_roas_values: List[float] = Field(
        default_factory=list, description="ROAS for last 5 steps"
    )
    recent_angles: List[int] = Field(
        default_factory=list, description="Angles chosen in last 5 steps"
    )
    recent_frictions: List[float] = Field(
        default_factory=list, description="Friction levels in last 5 steps"
    )

    # Account health
    account_poison_score: float = Field(
        default=0.0,
        description=(
            "0.0-1.0: how polluted the audience is with low-intent users. "
            "Higher = more bots/window shoppers in future impressions."
        ),
    )
    audience_quality_index: float = Field(
        default=1.0,
        description="0.0-1.0: overall quality of audience being reached",
    )

    # Market signals (partially observable)
    market_trend: str = Field(
        default="stable",
        description="Market regime hint: 'stable', 'shifting', or 'volatile'",
    )
    angle_saturation: List[float] = Field(
        default_factory=lambda: [0.0] * NUM_ANGLES,
        description="Per-angle fatigue/saturation level (0.0-1.0), 5 values",
    )

    # Budget info
    last_spend_multiplier: float = Field(
        default=1.0, description="Spend multiplier used last step"
    )
    total_budget_used: float = Field(
        default=0.0, description="Cumulative budget spent ($)"
    )

    # Delayed conversions (tasks 2 & 3)
    pending_conversions: int = Field(
        default=0,
        description=(
            "Conversions not yet realized — users clicked but haven't "
            "converted yet. Revenue from these is unknown."
        ),
    )
    pending_revenue_estimate: float = Field(
        default=0.0,
        description=(
            "Estimated revenue from pending conversions based on "
            "historical conversion value. May be inaccurate."
        ),
    )
    realized_revenue_this_step: float = Field(
        default=0.0,
        description="Revenue from delayed conversions that arrived this step ($)",
    )

    # Cumulative stats
    total_revenue: float = Field(default=0.0, description="Cumulative realized revenue ($)")
    total_cost: float = Field(default=0.0, description="Cumulative ad spend ($)")
    total_clicks: int = Field(default=0, description="Cumulative clicks")
    total_conversions: int = Field(default=0, description="Cumulative realized conversions")
    cumulative_roas: float = Field(default=0.0, description="Cumulative ROAS (realized_revenue/total_cost)")

    # Grading (populated on final step when done=True)
    grade_score: Optional[float] = Field(
        default=None,
        description="Final grade 0.0-1.0 (only set on terminal observation)",
    )
    grade_details: Optional[Dict[str, Any]] = Field(
        default=None,
        description="Breakdown of grading components (only set on terminal observation)",
    )