Spaces:

sanjayvk21
/

OpenEnv-Auctioneer

Sleeping

Ikshitha Janarthanan

feat:task 3 enhanced

3e3afc7 about 1 month ago

9.39 kB

	"""
	models.py — Typed data contracts for the OpenEnv Creative Auctioneer.

	All tensors / vectors are represented as plain Python types so the environment
	stays framework-agnostic (no hard dependency on PyTorch at this layer).

	Dataset provenance (v0.4):
	CTR calibration → MIND (Microsoft News Dataset) behaviours.tsv + news.tsv
	Market engine → iPinYou Global RTB logs (Lognormal per hour)
	Persona bank → Vogue Dialogue Dataset
	Ad+Caption pool → MS-COCO Captions OR Google Conceptual Captions CC3M
	Viral hashtags → Pytrends / Hashtagify / static fallback table
	"""

	from typing import List, Optional
	from pydantic import BaseModel, Field


	# ---------------------------------------------------------------------------
	# Reward (typed wrapper so step() signature is explicit)
	# ---------------------------------------------------------------------------

	class Reward(BaseModel):
	value: float = Field(...,
	description="Scalar step reward. "
	"Positive = profitable auction win; negative = missed bid penalty.")


	# ---------------------------------------------------------------------------
	# Observation (what the agent sees each step)
	# ---------------------------------------------------------------------------

	class Observation(BaseModel):
	# ── Temporal ──────────────────────────────────────────────────────────
	hour_of_day: int = Field(..., ge=0, le=23,
	description="Current hour of the 24-hour campaign cycle (0–23).")

	# ── Budget / Pacing ────────────────────────────────────────────────────
	remaining_budget: float = Field(...,
	description="Remaining daily budget in USD.")
	spent_so_far: float = Field(default=0.0,
	description="Cumulative spend in USD up to this step.")

	# ── Contextual Signals (Privacy-Native — no user IDs) ──────────────────
	current_context: str = Field(...,
	description="Content category derived from MIND news.tsv taxonomy "
	"(e.g. 'Fitness', 'Tech', 'Fashion', 'Gaming').")
	news_category: str = Field(default="",
	description="Fine-grained MIND subcategory (e.g. 'nfl', 'gadgets'). "
	"Provides richer signal than coarse context alone.")
	viral_trend: str = Field(...,
	description="Current cultural viral token surfaced from Reels "
	"(e.g. 'Quiet Luxury', 'Eco-Friendly', 'Cyberpunk', 'Minimalism').")

	# ── hard_assembly: live scraped hashtags + source creative ─────────────
	live_hashtags: List[str] = Field(default_factory=list,
	description="[hard_assembly] Real-time scraped viral hashtags from "
	"Google Trends / Reddit. The agent selects which to use "
	"and weaves them into generated_caption. "
	"Example: ['#QuietLuxury', '#OOTD', '#SlowFashion'].")
	image_description: str = Field(default="",
	description="[hard_assembly] Text description of the source ad image "
	"from AdCaptionDataset (COCO or seed pool). "
	"Agent caption must stay coherent with this.")
	base_caption: str = Field(default="",
	description="[hard_assembly] Base caption from AdCaptionDataset. "
	"Agent rewrites this to incorporate viral hashtags.")

	# ── Market Signals ─────────────────────────────────────────────────────
	market_pressure: float = Field(default=0.5, ge=0.0, le=1.0,
	description="Normalised indicator of how competitive the auction is "
	"this hour (0 = cheap, 1 = very expensive).")

	# ── Session State ──────────────────────────────────────────────────────
	ads_shown_this_session: int = Field(default=0,
	description="Number of ads already shown; drives the fatigue penalty.")
	fatigue_level: float = Field(default=0.0, ge=0.0, le=1.0,
	description="Accumulated user-fatigue penalty (0 = fresh, 1 = fully fatigued).")
	carryover_boost: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_sequencing] Carry-over CTR boost from winning prior auctions.")

	# ── Performance Feedback (delayed by 1 step) ───────────────────────────
	last_ctr: float = Field(default=0.0, ge=0.0, le=1.0,
	description="CTR returned by the User Simulator on the previous step.")
	cumulative_revenue: float = Field(default=0.0,
	description="Total revenue earned so far.")


	# ---------------------------------------------------------------------------
	# Action (what the agent does each step)
	# ---------------------------------------------------------------------------

	class Action(BaseModel):
	# Continuous: auction bid
	bid_price: float = Field(..., ge=0.0,
	description="Bid submitted to the RTB auction in USD.")

	# Discrete: creative selection
	headline_id: int = Field(..., ge=0, le=5,
	description="Index into the Headlines Catalog (0–5).")
	creative_id: int = Field(..., ge=0, le=5,
	description="Index into the Creatives Catalog (0–5).")

	# ── hard_assembly fields ────────────────────────────────────────────────
	generated_caption: Optional[str] = Field(default=None,
	description="[hard_assembly] Final assembled caption — should incorporate "
	"viral hashtags and remain coherent with the source image. "
	"Leave None for easy/medium tasks.")

	generated_hashtags: Optional[List[str]] = Field(default=None,
	description="[hard_assembly] List of hashtag strings (with #) that the agent "
	"chose to include. The agent must scrape these from ViralHashtagScraper "
	"and select which ones to weave into generated_caption. "
	"Example: ['#QuietLuxury', '#OOTD', '#SlowFashion']. "
	"Leave None for easy/medium/sequencing tasks.")


	# ---------------------------------------------------------------------------
	# Per-step Info (returned alongside reward; not part of observation)
	# ---------------------------------------------------------------------------

	class Info(BaseModel):
	task_id: str
	current_step: int
	total_revenue: float
	clearing_price: float = Field(default=0.0,
	description="The winning competitor bid price this step.")
	auction_won: bool = Field(default=False,
	description="Whether the agent won the auction this step.")
	raw_ctr: float = Field(default=0.0,
	description="CTR before fatigue penalty applied.")
	adjusted_ctr: float = Field(default=0.0,
	description="CTR after fatigue penalty.")

	# ── Per-task grader scores ────────────────────────────────────────────
	task_score: float = Field(..., ge=0.0, le=1.0,
	description="Final 0.0–1.0 task-completion score.")

	# Level 1 sub-score
	headline_alignment_score: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[easy_headline] CTR_selected / CTR_best for this context.")

	# Level 2 sub-score
	pacing_score: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[medium_pacing] Budget-smoothness and peak-hour survival bonus.")

	# Level 3 sub-scores (all three axes)
	clip_similarity_score: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_assembly] Composite grader score (0.35×hashtag + 0.35×align + 0.30×coherence).")
	hashtag_relevance_score: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_assembly] Mean cosine_sim(chosen_hashtag, viral_trend).")
	caption_trend_alignment: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_assembly] cosine_sim(final_caption, viral_trend).")
	caption_image_coherence: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_assembly] cosine_sim(final_caption, image_description).")
	chosen_hashtags: List[str] = Field(default_factory=list,
	description="[hard_assembly] Hashtags the agent chose this step.")
	assembly_reward_bonus: float = Field(default=0.0,
	description="[hard_assembly] Extra reward granted for viral alignment quality.")

	# Level 4 sub-scores
	sequencing_score: float = Field(default=0.0, ge=0.0, le=1.0,
	description="[hard_sequencing] agent_conversions / oracle_conversions × diversity.")
	contexts_covered: int = Field(default=0,
	description="[hard_sequencing] Number of distinct contexts won at least once.")
	diversity_multiplier: float = Field(default=1.0,
	description="[hard_sequencing] Bonus multiplier for covering ≥3 contexts.")