rishabh16196's picture
Upload folder using huggingface_hub
3950a3e verified
"""
Data models for the Creative Friction Gym Environment.
An ad-creative RL environment where the agent manages a portfolio of
ad creatives, choosing the messaging angle, the amount of "friction"
(qualifying text), and how much budget to allocate each step.
5 creative angles:
FOMO, Social Proof, Benefit-Driven, Curiosity, Value
Friction ranges from 0.0 (no qualifier — "Get it free!") to
1.0 (maximum qualifier — "Enterprise only, schedule a demo").
Budget multiplier ranges from 0.5x (conserve spend) to 2.0x
(double down on a winning creative).
Conversions in tasks 2 & 3 are delayed: users click now but
convert 1-5 steps later. The agent must make decisions with
incomplete revenue information.
"""
from typing import Any, Dict, List, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field
# ---------------------------------------------------------------------------
# Constants
# ---------------------------------------------------------------------------
# Creative Angles
ANGLE_FOMO = 0 # "Only 3 left!" urgency messaging
ANGLE_SOCIAL_PROOF = 1 # "Join 10,000+ customers"
ANGLE_BENEFIT = 2 # "Save 2 hours every day"
ANGLE_CURIOSITY = 3 # "The secret top performers use"
ANGLE_VALUE = 4 # "Premium quality, fair price"
NUM_ANGLES = 5
ANGLE_NAMES = ["fomo", "social_proof", "benefit", "curiosity", "value"]
# Friction bounds
MIN_FRICTION = 0.0
MAX_FRICTION = 1.0
# Budget multiplier bounds
MIN_SPEND_MULTIPLIER = 0.5
MAX_SPEND_MULTIPLIER = 2.0
# Conversion delay parameters (tasks 2 & 3 only)
# Delay is sampled from geometric distribution with this mean
CONVERSION_DELAY_MEAN = 3 # steps
# User segments (internal to simulation)
SEGMENT_BOT = 0
SEGMENT_WINDOW_SHOPPER = 1
SEGMENT_BARGAIN_HUNTER = 2
SEGMENT_HIGH_INTENT = 3
NUM_SEGMENTS = 4
SEGMENT_NAMES = ["bot", "window_shopper", "bargain_hunter", "high_intent"]
# Default segment weights (proportion of impressions)
DEFAULT_SEGMENT_WEIGHTS = [0.15, 0.35, 0.30, 0.20]
# Per-segment base click probability
SEGMENT_BASE_CLICK = [0.08, 0.04, 0.03, 0.015]
# Per-segment conversion rate (given a click)
SEGMENT_CONVERSION_RATE = [0.00, 0.02, 0.08, 0.35]
# Per-segment revenue per conversion ($)
SEGMENT_REVENUE = [0.0, 5.0, 15.0, 80.0]
# Per-angle effectiveness multiplier per segment [angle][segment]
# Rows: FOMO, Social Proof, Benefit, Curiosity, Value
# Cols: Bot, Window Shopper, Bargain Hunter, High Intent
ANGLE_SEGMENT_MATRIX = [
[1.2, 1.3, 1.1, 0.8], # FOMO
[0.9, 1.1, 1.0, 1.3], # Social Proof
[1.0, 0.9, 0.8, 1.4], # Benefit
[1.3, 1.2, 1.0, 0.7], # Curiosity
[0.8, 1.0, 1.4, 1.1], # Value
]
# Impressions per step (fixed budget)
IMPRESSIONS_PER_STEP = 1000
# Cost per mille (CPM) in dollars
CPM = 12.0
# Tasks
TASK_NAMES = ["maximize_ctr", "maximize_roas", "survive_fatigue"]
MAX_STEPS_PER_TASK = {
"maximize_ctr": 30,
"maximize_roas": 40,
"survive_fatigue": 50,
}
# ---------------------------------------------------------------------------
# Action
# ---------------------------------------------------------------------------
class CreativeFrictionAction(Action):
"""Action: choose a creative angle, friction level, and budget allocation.
angle:
0 = FOMO ("Only 3 left!")
1 = Social Proof ("Join 10,000+ customers")
2 = Benefit ("Save 2 hours every day")
3 = Curiosity ("The secret top performers use")
4 = Value ("Premium quality, fair price")
friction:
0.0 = No qualifying text (maximizes clicks, attracts everyone)
0.5 = Moderate qualifier ("Starting at $99/mo")
1.0 = Heavy qualifier ("Enterprise only, schedule a demo")
spend_multiplier:
0.5 = Conserve budget (500 impressions)
1.0 = Standard budget (1000 impressions)
2.0 = Double down (2000 impressions)
Higher friction reduces total clicks but filters out low-intent
users and bots, potentially improving conversion quality and ROAS.
Higher spend increases impressions and cost proportionally.
"""
angle: int = Field(
...,
description=(
"Creative angle: "
"0=FOMO, 1=Social Proof, 2=Benefit, 3=Curiosity, 4=Value"
),
ge=0,
le=NUM_ANGLES - 1,
)
friction: float = Field(
...,
description=(
"Friction level 0.0-1.0: amount of qualifying text. "
"0.0=no qualifier (max clicks), 1.0=max qualifier (filters low-intent)"
),
ge=MIN_FRICTION,
le=MAX_FRICTION,
)
spend_multiplier: float = Field(
default=1.0,
description=(
"Budget multiplier 0.5-2.0: scales impressions and cost. "
"0.5=conserve, 1.0=standard, 2.0=double down"
),
ge=MIN_SPEND_MULTIPLIER,
le=MAX_SPEND_MULTIPLIER,
)
# ---------------------------------------------------------------------------
# Observation
# ---------------------------------------------------------------------------
class CreativeFrictionObservation(Observation):
"""Observation from the ad-creative environment."""
# Task info
task_name: str = Field(default="maximize_ctr", description="Current task/scenario name")
step_number: int = Field(default=0, description="Current simulation step")
max_steps: int = Field(default=30, description="Total steps for this task")
# Last step performance
last_ctr: float = Field(default=0.0, description="Click-through rate of last ad (0.0-1.0)")
last_conversion_rate: float = Field(
default=0.0, description="Conversion rate among clickers (0.0-1.0)"
)
last_revenue: float = Field(default=0.0, description="Revenue generated last step ($)")
last_cost: float = Field(default=0.0, description="Ad spend last step ($)")
last_roas: float = Field(default=0.0, description="Return on ad spend (revenue/cost)")
# Rolling history (last 5 steps)
recent_ctrs: List[float] = Field(
default_factory=list, description="CTR for last 5 steps"
)
recent_roas_values: List[float] = Field(
default_factory=list, description="ROAS for last 5 steps"
)
recent_angles: List[int] = Field(
default_factory=list, description="Angles chosen in last 5 steps"
)
recent_frictions: List[float] = Field(
default_factory=list, description="Friction levels in last 5 steps"
)
# Account health
account_poison_score: float = Field(
default=0.0,
description=(
"0.0-1.0: how polluted the audience is with low-intent users. "
"Higher = more bots/window shoppers in future impressions."
),
)
audience_quality_index: float = Field(
default=1.0,
description="0.0-1.0: overall quality of audience being reached",
)
# Market signals (partially observable)
market_trend: str = Field(
default="stable",
description="Market regime hint: 'stable', 'shifting', or 'volatile'",
)
angle_saturation: List[float] = Field(
default_factory=lambda: [0.0] * NUM_ANGLES,
description="Per-angle fatigue/saturation level (0.0-1.0), 5 values",
)
# Budget info
last_spend_multiplier: float = Field(
default=1.0, description="Spend multiplier used last step"
)
total_budget_used: float = Field(
default=0.0, description="Cumulative budget spent ($)"
)
# Delayed conversions (tasks 2 & 3)
pending_conversions: int = Field(
default=0,
description=(
"Conversions not yet realized — users clicked but haven't "
"converted yet. Revenue from these is unknown."
),
)
pending_revenue_estimate: float = Field(
default=0.0,
description=(
"Estimated revenue from pending conversions based on "
"historical conversion value. May be inaccurate."
),
)
realized_revenue_this_step: float = Field(
default=0.0,
description="Revenue from delayed conversions that arrived this step ($)",
)
# Cumulative stats
total_revenue: float = Field(default=0.0, description="Cumulative realized revenue ($)")
total_cost: float = Field(default=0.0, description="Cumulative ad spend ($)")
total_clicks: int = Field(default=0, description="Cumulative clicks")
total_conversions: int = Field(default=0, description="Cumulative realized conversions")
cumulative_roas: float = Field(default=0.0, description="Cumulative ROAS (realized_revenue/total_cost)")
# Grading (populated on final step when done=True)
grade_score: Optional[float] = Field(
default=None,
description="Final grade 0.0-1.0 (only set on terminal observation)",
)
grade_details: Optional[Dict[str, Any]] = Field(
default=None,
description="Breakdown of grading components (only set on terminal observation)",
)