""" Data models for the Creative Friction Gym Environment. An ad-creative RL environment where the agent manages a portfolio of ad creatives, choosing the messaging angle, the amount of "friction" (qualifying text), and how much budget to allocate each step. 5 creative angles: FOMO, Social Proof, Benefit-Driven, Curiosity, Value Friction ranges from 0.0 (no qualifier — "Get it free!") to 1.0 (maximum qualifier — "Enterprise only, schedule a demo"). Budget multiplier ranges from 0.5x (conserve spend) to 2.0x (double down on a winning creative). Conversions in tasks 2 & 3 are delayed: users click now but convert 1-5 steps later. The agent must make decisions with incomplete revenue information. """ from typing import Any, Dict, List, Optional from openenv.core.env_server.types import Action, Observation from pydantic import Field # --------------------------------------------------------------------------- # Constants # --------------------------------------------------------------------------- # Creative Angles ANGLE_FOMO = 0 # "Only 3 left!" urgency messaging ANGLE_SOCIAL_PROOF = 1 # "Join 10,000+ customers" ANGLE_BENEFIT = 2 # "Save 2 hours every day" ANGLE_CURIOSITY = 3 # "The secret top performers use" ANGLE_VALUE = 4 # "Premium quality, fair price" NUM_ANGLES = 5 ANGLE_NAMES = ["fomo", "social_proof", "benefit", "curiosity", "value"] # Friction bounds MIN_FRICTION = 0.0 MAX_FRICTION = 1.0 # Budget multiplier bounds MIN_SPEND_MULTIPLIER = 0.5 MAX_SPEND_MULTIPLIER = 2.0 # Conversion delay parameters (tasks 2 & 3 only) # Delay is sampled from geometric distribution with this mean CONVERSION_DELAY_MEAN = 3 # steps # User segments (internal to simulation) SEGMENT_BOT = 0 SEGMENT_WINDOW_SHOPPER = 1 SEGMENT_BARGAIN_HUNTER = 2 SEGMENT_HIGH_INTENT = 3 NUM_SEGMENTS = 4 SEGMENT_NAMES = ["bot", "window_shopper", "bargain_hunter", "high_intent"] # Default segment weights (proportion of impressions) DEFAULT_SEGMENT_WEIGHTS = [0.15, 0.35, 0.30, 0.20] # Per-segment base click probability SEGMENT_BASE_CLICK = [0.08, 0.04, 0.03, 0.015] # Per-segment conversion rate (given a click) SEGMENT_CONVERSION_RATE = [0.00, 0.02, 0.08, 0.35] # Per-segment revenue per conversion ($) SEGMENT_REVENUE = [0.0, 5.0, 15.0, 80.0] # Per-angle effectiveness multiplier per segment [angle][segment] # Rows: FOMO, Social Proof, Benefit, Curiosity, Value # Cols: Bot, Window Shopper, Bargain Hunter, High Intent ANGLE_SEGMENT_MATRIX = [ [1.2, 1.3, 1.1, 0.8], # FOMO [0.9, 1.1, 1.0, 1.3], # Social Proof [1.0, 0.9, 0.8, 1.4], # Benefit [1.3, 1.2, 1.0, 0.7], # Curiosity [0.8, 1.0, 1.4, 1.1], # Value ] # Impressions per step (fixed budget) IMPRESSIONS_PER_STEP = 1000 # Cost per mille (CPM) in dollars CPM = 12.0 # Tasks TASK_NAMES = ["maximize_ctr", "maximize_roas", "survive_fatigue"] MAX_STEPS_PER_TASK = { "maximize_ctr": 30, "maximize_roas": 40, "survive_fatigue": 50, } # --------------------------------------------------------------------------- # Action # --------------------------------------------------------------------------- class CreativeFrictionAction(Action): """Action: choose a creative angle, friction level, and budget allocation. angle: 0 = FOMO ("Only 3 left!") 1 = Social Proof ("Join 10,000+ customers") 2 = Benefit ("Save 2 hours every day") 3 = Curiosity ("The secret top performers use") 4 = Value ("Premium quality, fair price") friction: 0.0 = No qualifying text (maximizes clicks, attracts everyone) 0.5 = Moderate qualifier ("Starting at $99/mo") 1.0 = Heavy qualifier ("Enterprise only, schedule a demo") spend_multiplier: 0.5 = Conserve budget (500 impressions) 1.0 = Standard budget (1000 impressions) 2.0 = Double down (2000 impressions) Higher friction reduces total clicks but filters out low-intent users and bots, potentially improving conversion quality and ROAS. Higher spend increases impressions and cost proportionally. """ angle: int = Field( ..., description=( "Creative angle: " "0=FOMO, 1=Social Proof, 2=Benefit, 3=Curiosity, 4=Value" ), ge=0, le=NUM_ANGLES - 1, ) friction: float = Field( ..., description=( "Friction level 0.0-1.0: amount of qualifying text. " "0.0=no qualifier (max clicks), 1.0=max qualifier (filters low-intent)" ), ge=MIN_FRICTION, le=MAX_FRICTION, ) spend_multiplier: float = Field( default=1.0, description=( "Budget multiplier 0.5-2.0: scales impressions and cost. " "0.5=conserve, 1.0=standard, 2.0=double down" ), ge=MIN_SPEND_MULTIPLIER, le=MAX_SPEND_MULTIPLIER, ) # --------------------------------------------------------------------------- # Observation # --------------------------------------------------------------------------- class CreativeFrictionObservation(Observation): """Observation from the ad-creative environment.""" # Task info task_name: str = Field(default="maximize_ctr", description="Current task/scenario name") step_number: int = Field(default=0, description="Current simulation step") max_steps: int = Field(default=30, description="Total steps for this task") # Last step performance last_ctr: float = Field(default=0.0, description="Click-through rate of last ad (0.0-1.0)") last_conversion_rate: float = Field( default=0.0, description="Conversion rate among clickers (0.0-1.0)" ) last_revenue: float = Field(default=0.0, description="Revenue generated last step ($)") last_cost: float = Field(default=0.0, description="Ad spend last step ($)") last_roas: float = Field(default=0.0, description="Return on ad spend (revenue/cost)") # Rolling history (last 5 steps) recent_ctrs: List[float] = Field( default_factory=list, description="CTR for last 5 steps" ) recent_roas_values: List[float] = Field( default_factory=list, description="ROAS for last 5 steps" ) recent_angles: List[int] = Field( default_factory=list, description="Angles chosen in last 5 steps" ) recent_frictions: List[float] = Field( default_factory=list, description="Friction levels in last 5 steps" ) # Account health account_poison_score: float = Field( default=0.0, description=( "0.0-1.0: how polluted the audience is with low-intent users. " "Higher = more bots/window shoppers in future impressions." ), ) audience_quality_index: float = Field( default=1.0, description="0.0-1.0: overall quality of audience being reached", ) # Market signals (partially observable) market_trend: str = Field( default="stable", description="Market regime hint: 'stable', 'shifting', or 'volatile'", ) angle_saturation: List[float] = Field( default_factory=lambda: [0.0] * NUM_ANGLES, description="Per-angle fatigue/saturation level (0.0-1.0), 5 values", ) # Budget info last_spend_multiplier: float = Field( default=1.0, description="Spend multiplier used last step" ) total_budget_used: float = Field( default=0.0, description="Cumulative budget spent ($)" ) # Delayed conversions (tasks 2 & 3) pending_conversions: int = Field( default=0, description=( "Conversions not yet realized — users clicked but haven't " "converted yet. Revenue from these is unknown." ), ) pending_revenue_estimate: float = Field( default=0.0, description=( "Estimated revenue from pending conversions based on " "historical conversion value. May be inaccurate." ), ) realized_revenue_this_step: float = Field( default=0.0, description="Revenue from delayed conversions that arrived this step ($)", ) # Cumulative stats total_revenue: float = Field(default=0.0, description="Cumulative realized revenue ($)") total_cost: float = Field(default=0.0, description="Cumulative ad spend ($)") total_clicks: int = Field(default=0, description="Cumulative clicks") total_conversions: int = Field(default=0, description="Cumulative realized conversions") cumulative_roas: float = Field(default=0.0, description="Cumulative ROAS (realized_revenue/total_cost)") # Grading (populated on final step when done=True) grade_score: Optional[float] = Field( default=None, description="Final grade 0.0-1.0 (only set on terminal observation)", ) grade_details: Optional[Dict[str, Any]] = Field( default=None, description="Breakdown of grading components (only set on terminal observation)", )