OpenEnv-Auctioneer / models.py
Ikshitha Janarthanan
feat:task 3 enhanced
3e3afc7
"""
models.py β€” Typed data contracts for the OpenEnv Creative Auctioneer.
All tensors / vectors are represented as plain Python types so the environment
stays framework-agnostic (no hard dependency on PyTorch at this layer).
Dataset provenance (v0.4):
CTR calibration β†’ MIND (Microsoft News Dataset) behaviours.tsv + news.tsv
Market engine β†’ iPinYou Global RTB logs (Lognormal per hour)
Persona bank β†’ Vogue Dialogue Dataset
Ad+Caption pool β†’ MS-COCO Captions OR Google Conceptual Captions CC3M
Viral hashtags β†’ Pytrends / Hashtagify / static fallback table
"""
from typing import List, Optional
from pydantic import BaseModel, Field
# ---------------------------------------------------------------------------
# Reward (typed wrapper so step() signature is explicit)
# ---------------------------------------------------------------------------
class Reward(BaseModel):
value: float = Field(...,
description="Scalar step reward. "
"Positive = profitable auction win; negative = missed bid penalty.")
# ---------------------------------------------------------------------------
# Observation (what the agent *sees* each step)
# ---------------------------------------------------------------------------
class Observation(BaseModel):
# ── Temporal ──────────────────────────────────────────────────────────
hour_of_day: int = Field(..., ge=0, le=23,
description="Current hour of the 24-hour campaign cycle (0–23).")
# ── Budget / Pacing ────────────────────────────────────────────────────
remaining_budget: float = Field(...,
description="Remaining daily budget in USD.")
spent_so_far: float = Field(default=0.0,
description="Cumulative spend in USD up to this step.")
# ── Contextual Signals (Privacy-Native β€” no user IDs) ──────────────────
current_context: str = Field(...,
description="Content category derived from MIND news.tsv taxonomy "
"(e.g. 'Fitness', 'Tech', 'Fashion', 'Gaming').")
news_category: str = Field(default="",
description="Fine-grained MIND subcategory (e.g. 'nfl', 'gadgets'). "
"Provides richer signal than coarse context alone.")
viral_trend: str = Field(...,
description="Current cultural viral token surfaced from Reels "
"(e.g. 'Quiet Luxury', 'Eco-Friendly', 'Cyberpunk', 'Minimalism').")
# ── hard_assembly: live scraped hashtags + source creative ─────────────
live_hashtags: List[str] = Field(default_factory=list,
description="[hard_assembly] Real-time scraped viral hashtags from "
"Google Trends / Reddit. The agent selects which to use "
"and weaves them into generated_caption. "
"Example: ['#QuietLuxury', '#OOTD', '#SlowFashion'].")
image_description: str = Field(default="",
description="[hard_assembly] Text description of the source ad image "
"from AdCaptionDataset (COCO or seed pool). "
"Agent caption must stay coherent with this.")
base_caption: str = Field(default="",
description="[hard_assembly] Base caption from AdCaptionDataset. "
"Agent rewrites this to incorporate viral hashtags.")
# ── Market Signals ─────────────────────────────────────────────────────
market_pressure: float = Field(default=0.5, ge=0.0, le=1.0,
description="Normalised indicator of how competitive the auction is "
"this hour (0 = cheap, 1 = very expensive).")
# ── Session State ──────────────────────────────────────────────────────
ads_shown_this_session: int = Field(default=0,
description="Number of ads already shown; drives the fatigue penalty.")
fatigue_level: float = Field(default=0.0, ge=0.0, le=1.0,
description="Accumulated user-fatigue penalty (0 = fresh, 1 = fully fatigued).")
carryover_boost: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_sequencing] Carry-over CTR boost from winning prior auctions.")
# ── Performance Feedback (delayed by 1 step) ───────────────────────────
last_ctr: float = Field(default=0.0, ge=0.0, le=1.0,
description="CTR returned by the User Simulator on the previous step.")
cumulative_revenue: float = Field(default=0.0,
description="Total revenue earned so far.")
# ---------------------------------------------------------------------------
# Action (what the agent *does* each step)
# ---------------------------------------------------------------------------
class Action(BaseModel):
# Continuous: auction bid
bid_price: float = Field(..., ge=0.0,
description="Bid submitted to the RTB auction in USD.")
# Discrete: creative selection
headline_id: int = Field(..., ge=0, le=5,
description="Index into the Headlines Catalog (0–5).")
creative_id: int = Field(..., ge=0, le=5,
description="Index into the Creatives Catalog (0–5).")
# ── hard_assembly fields ────────────────────────────────────────────────
generated_caption: Optional[str] = Field(default=None,
description="[hard_assembly] Final assembled caption β€” should incorporate "
"viral hashtags and remain coherent with the source image. "
"Leave None for easy/medium tasks.")
generated_hashtags: Optional[List[str]] = Field(default=None,
description="[hard_assembly] List of hashtag strings (with #) that the agent "
"chose to include. The agent must scrape these from ViralHashtagScraper "
"and select which ones to weave into generated_caption. "
"Example: ['#QuietLuxury', '#OOTD', '#SlowFashion']. "
"Leave None for easy/medium/sequencing tasks.")
# ---------------------------------------------------------------------------
# Per-step Info (returned alongside reward; not part of observation)
# ---------------------------------------------------------------------------
class Info(BaseModel):
task_id: str
current_step: int
total_revenue: float
clearing_price: float = Field(default=0.0,
description="The winning competitor bid price this step.")
auction_won: bool = Field(default=False,
description="Whether the agent won the auction this step.")
raw_ctr: float = Field(default=0.0,
description="CTR before fatigue penalty applied.")
adjusted_ctr: float = Field(default=0.0,
description="CTR after fatigue penalty.")
# ── Per-task grader scores ────────────────────────────────────────────
task_score: float = Field(..., ge=0.0, le=1.0,
description="Final 0.0–1.0 task-completion score.")
# Level 1 sub-score
headline_alignment_score: float = Field(default=0.0, ge=0.0, le=1.0,
description="[easy_headline] CTR_selected / CTR_best for this context.")
# Level 2 sub-score
pacing_score: float = Field(default=0.0, ge=0.0, le=1.0,
description="[medium_pacing] Budget-smoothness and peak-hour survival bonus.")
# Level 3 sub-scores (all three axes)
clip_similarity_score: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_assembly] Composite grader score (0.35Γ—hashtag + 0.35Γ—align + 0.30Γ—coherence).")
hashtag_relevance_score: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_assembly] Mean cosine_sim(chosen_hashtag, viral_trend).")
caption_trend_alignment: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_assembly] cosine_sim(final_caption, viral_trend).")
caption_image_coherence: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_assembly] cosine_sim(final_caption, image_description).")
chosen_hashtags: List[str] = Field(default_factory=list,
description="[hard_assembly] Hashtags the agent chose this step.")
assembly_reward_bonus: float = Field(default=0.0,
description="[hard_assembly] Extra reward granted for viral alignment quality.")
# Level 4 sub-scores
sequencing_score: float = Field(default=0.0, ge=0.0, le=1.0,
description="[hard_sequencing] agent_conversions / oracle_conversions Γ— diversity.")
contexts_covered: int = Field(default=0,
description="[hard_sequencing] Number of distinct contexts won at least once.")
diversity_multiplier: float = Field(default=1.0,
description="[hard_sequencing] Bonus multiplier for covering β‰₯3 contexts.")