Spaces:

rishabh16196
/

creative-friction-env

Sleeping

App Files Files Community

creative-friction-env / models.py

rishabh16196

Upload folder using huggingface_hub

3950a3e verified about 2 months ago

raw

history blame contribute delete

8.97 kB

	"""
	Data models for the Creative Friction Gym Environment.

	An ad-creative RL environment where the agent manages a portfolio of
	ad creatives, choosing the messaging angle, the amount of "friction"
	(qualifying text), and how much budget to allocate each step.

	5 creative angles:
	FOMO, Social Proof, Benefit-Driven, Curiosity, Value

	Friction ranges from 0.0 (no qualifier — "Get it free!") to
	1.0 (maximum qualifier — "Enterprise only, schedule a demo").

	Budget multiplier ranges from 0.5x (conserve spend) to 2.0x
	(double down on a winning creative).

	Conversions in tasks 2 & 3 are delayed: users click now but
	convert 1-5 steps later. The agent must make decisions with
	incomplete revenue information.
	"""

	from typing import Any, Dict, List, Optional

	from openenv.core.env_server.types import Action, Observation
	from pydantic import Field


	# ---------------------------------------------------------------------------
	# Constants
	# ---------------------------------------------------------------------------

	# Creative Angles
	ANGLE_FOMO = 0 # "Only 3 left!" urgency messaging
	ANGLE_SOCIAL_PROOF = 1 # "Join 10,000+ customers"
	ANGLE_BENEFIT = 2 # "Save 2 hours every day"
	ANGLE_CURIOSITY = 3 # "The secret top performers use"
	ANGLE_VALUE = 4 # "Premium quality, fair price"

	NUM_ANGLES = 5
	ANGLE_NAMES = ["fomo", "social_proof", "benefit", "curiosity", "value"]

	# Friction bounds
	MIN_FRICTION = 0.0
	MAX_FRICTION = 1.0

	# Budget multiplier bounds
	MIN_SPEND_MULTIPLIER = 0.5
	MAX_SPEND_MULTIPLIER = 2.0

	# Conversion delay parameters (tasks 2 & 3 only)
	# Delay is sampled from geometric distribution with this mean
	CONVERSION_DELAY_MEAN = 3 # steps

	# User segments (internal to simulation)
	SEGMENT_BOT = 0
	SEGMENT_WINDOW_SHOPPER = 1
	SEGMENT_BARGAIN_HUNTER = 2
	SEGMENT_HIGH_INTENT = 3
	NUM_SEGMENTS = 4
	SEGMENT_NAMES = ["bot", "window_shopper", "bargain_hunter", "high_intent"]

	# Default segment weights (proportion of impressions)
	DEFAULT_SEGMENT_WEIGHTS = [0.15, 0.35, 0.30, 0.20]

	# Per-segment base click probability
	SEGMENT_BASE_CLICK = [0.08, 0.04, 0.03, 0.015]

	# Per-segment conversion rate (given a click)
	SEGMENT_CONVERSION_RATE = [0.00, 0.02, 0.08, 0.35]

	# Per-segment revenue per conversion ($)
	SEGMENT_REVENUE = [0.0, 5.0, 15.0, 80.0]

	# Per-angle effectiveness multiplier per segment [angle][segment]
	# Rows: FOMO, Social Proof, Benefit, Curiosity, Value
	# Cols: Bot, Window Shopper, Bargain Hunter, High Intent
	ANGLE_SEGMENT_MATRIX = [
	[1.2, 1.3, 1.1, 0.8], # FOMO
	[0.9, 1.1, 1.0, 1.3], # Social Proof
	[1.0, 0.9, 0.8, 1.4], # Benefit
	[1.3, 1.2, 1.0, 0.7], # Curiosity
	[0.8, 1.0, 1.4, 1.1], # Value
	]

	# Impressions per step (fixed budget)
	IMPRESSIONS_PER_STEP = 1000

	# Cost per mille (CPM) in dollars
	CPM = 12.0

	# Tasks
	TASK_NAMES = ["maximize_ctr", "maximize_roas", "survive_fatigue"]
	MAX_STEPS_PER_TASK = {
	"maximize_ctr": 30,
	"maximize_roas": 40,
	"survive_fatigue": 50,
	}


	# ---------------------------------------------------------------------------
	# Action
	# ---------------------------------------------------------------------------

	class CreativeFrictionAction(Action):
	"""Action: choose a creative angle, friction level, and budget allocation.

	angle:
	0 = FOMO ("Only 3 left!")
	1 = Social Proof ("Join 10,000+ customers")
	2 = Benefit ("Save 2 hours every day")
	3 = Curiosity ("The secret top performers use")
	4 = Value ("Premium quality, fair price")

	friction:
	0.0 = No qualifying text (maximizes clicks, attracts everyone)
	0.5 = Moderate qualifier ("Starting at $99/mo")
	1.0 = Heavy qualifier ("Enterprise only, schedule a demo")

	spend_multiplier:
	0.5 = Conserve budget (500 impressions)
	1.0 = Standard budget (1000 impressions)
	2.0 = Double down (2000 impressions)

	Higher friction reduces total clicks but filters out low-intent
	users and bots, potentially improving conversion quality and ROAS.
	Higher spend increases impressions and cost proportionally.
	"""

	angle: int = Field(
	...,
	description=(
	"Creative angle: "
	"0=FOMO, 1=Social Proof, 2=Benefit, 3=Curiosity, 4=Value"
	),
	ge=0,
	le=NUM_ANGLES - 1,
	)
	friction: float = Field(
	...,
	description=(
	"Friction level 0.0-1.0: amount of qualifying text. "
	"0.0=no qualifier (max clicks), 1.0=max qualifier (filters low-intent)"
	),
	ge=MIN_FRICTION,
	le=MAX_FRICTION,
	)
	spend_multiplier: float = Field(
	default=1.0,
	description=(
	"Budget multiplier 0.5-2.0: scales impressions and cost. "
	"0.5=conserve, 1.0=standard, 2.0=double down"
	),
	ge=MIN_SPEND_MULTIPLIER,
	le=MAX_SPEND_MULTIPLIER,
	)


	# ---------------------------------------------------------------------------
	# Observation
	# ---------------------------------------------------------------------------

	class CreativeFrictionObservation(Observation):
	"""Observation from the ad-creative environment."""

	# Task info
	task_name: str = Field(default="maximize_ctr", description="Current task/scenario name")
	step_number: int = Field(default=0, description="Current simulation step")
	max_steps: int = Field(default=30, description="Total steps for this task")

	# Last step performance
	last_ctr: float = Field(default=0.0, description="Click-through rate of last ad (0.0-1.0)")
	last_conversion_rate: float = Field(
	default=0.0, description="Conversion rate among clickers (0.0-1.0)"
	)
	last_revenue: float = Field(default=0.0, description="Revenue generated last step ($)")
	last_cost: float = Field(default=0.0, description="Ad spend last step ($)")
	last_roas: float = Field(default=0.0, description="Return on ad spend (revenue/cost)")

	# Rolling history (last 5 steps)
	recent_ctrs: List[float] = Field(
	default_factory=list, description="CTR for last 5 steps"
	)
	recent_roas_values: List[float] = Field(
	default_factory=list, description="ROAS for last 5 steps"
	)
	recent_angles: List[int] = Field(
	default_factory=list, description="Angles chosen in last 5 steps"
	)
	recent_frictions: List[float] = Field(
	default_factory=list, description="Friction levels in last 5 steps"
	)

	# Account health
	account_poison_score: float = Field(
	default=0.0,
	description=(
	"0.0-1.0: how polluted the audience is with low-intent users. "
	"Higher = more bots/window shoppers in future impressions."
	),
	)
	audience_quality_index: float = Field(
	default=1.0,
	description="0.0-1.0: overall quality of audience being reached",
	)

	# Market signals (partially observable)
	market_trend: str = Field(
	default="stable",
	description="Market regime hint: 'stable', 'shifting', or 'volatile'",
	)
	angle_saturation: List[float] = Field(
	default_factory=lambda: [0.0] * NUM_ANGLES,
	description="Per-angle fatigue/saturation level (0.0-1.0), 5 values",
	)

	# Budget info
	last_spend_multiplier: float = Field(
	default=1.0, description="Spend multiplier used last step"
	)
	total_budget_used: float = Field(
	default=0.0, description="Cumulative budget spent ($)"
	)

	# Delayed conversions (tasks 2 & 3)
	pending_conversions: int = Field(
	default=0,
	description=(
	"Conversions not yet realized — users clicked but haven't "
	"converted yet. Revenue from these is unknown."
	),
	)
	pending_revenue_estimate: float = Field(
	default=0.0,
	description=(
	"Estimated revenue from pending conversions based on "
	"historical conversion value. May be inaccurate."
	),
	)
	realized_revenue_this_step: float = Field(
	default=0.0,
	description="Revenue from delayed conversions that arrived this step ($)",
	)

	# Cumulative stats
	total_revenue: float = Field(default=0.0, description="Cumulative realized revenue ($)")
	total_cost: float = Field(default=0.0, description="Cumulative ad spend ($)")
	total_clicks: int = Field(default=0, description="Cumulative clicks")
	total_conversions: int = Field(default=0, description="Cumulative realized conversions")
	cumulative_roas: float = Field(default=0.0, description="Cumulative ROAS (realized_revenue/total_cost)")

	# Grading (populated on final step when done=True)
	grade_score: Optional[float] = Field(
	default=None,
	description="Final grade 0.0-1.0 (only set on terminal observation)",
	)
	grade_details: Optional[Dict[str, Any]] = Field(
	default=None,
	description="Breakdown of grading components (only set on terminal observation)",
	)