File size: 9,392 Bytes
1d4a5a2
 
 
 
 
be2e30d
3e3afc7
be2e30d
 
 
3e3afc7
 
1d4a5a2
 
3e3afc7
2c3f1fb
 
1d4a5a2
be2e30d
 
 
 
 
 
 
 
 
 
1d4a5a2
 
 
 
2c3f1fb
1d4a5a2
 
 
 
 
 
 
 
 
 
 
 
be2e30d
3e3afc7
be2e30d
 
 
1d4a5a2
 
 
 
3e3afc7
 
 
 
 
 
 
 
 
 
 
 
 
 
1d4a5a2
 
 
 
 
 
 
 
 
 
3e3afc7
 
1d4a5a2
 
 
 
 
 
 
 
 
 
 
2c3f1fb
 
1d4a5a2
 
 
 
 
 
 
 
 
 
3e3afc7
1d4a5a2
3e3afc7
 
 
 
 
 
 
 
 
 
1d4a5a2
 
 
 
 
2c3f1fb
 
 
 
 
1d4a5a2
 
 
 
 
 
 
 
 
 
 
 
 
3e3afc7
1d4a5a2
 
3e3afc7
 
1d4a5a2
 
3e3afc7
 
1d4a5a2
3e3afc7
 
 
 
 
 
 
 
 
 
 
 
 
ce3d6c7
3e3afc7
 
 
ce3d6c7
3e3afc7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
"""
models.py β€” Typed data contracts for the OpenEnv Creative Auctioneer.

All tensors / vectors are represented as plain Python types so the environment
stays framework-agnostic (no hard dependency on PyTorch at this layer).

Dataset provenance (v0.4):
  CTR calibration  β†’ MIND (Microsoft News Dataset)  behaviours.tsv + news.tsv
  Market engine    β†’ iPinYou Global RTB logs         (Lognormal per hour)
  Persona bank     β†’ Vogue Dialogue Dataset
  Ad+Caption pool  β†’ MS-COCO Captions  OR  Google Conceptual Captions CC3M
  Viral hashtags   β†’ Pytrends / Hashtagify / static fallback table
"""

from typing import List, Optional
from pydantic import BaseModel, Field


# ---------------------------------------------------------------------------
# Reward  (typed wrapper so step() signature is explicit)
# ---------------------------------------------------------------------------

class Reward(BaseModel):
    value: float = Field(...,
        description="Scalar step reward. "
                    "Positive = profitable auction win; negative = missed bid penalty.")


# ---------------------------------------------------------------------------
# Observation  (what the agent *sees* each step)
# ---------------------------------------------------------------------------

class Observation(BaseModel):
    # ── Temporal ──────────────────────────────────────────────────────────
    hour_of_day: int = Field(..., ge=0, le=23,
        description="Current hour of the 24-hour campaign cycle (0–23).")

    # ── Budget / Pacing ────────────────────────────────────────────────────
    remaining_budget: float = Field(...,
        description="Remaining daily budget in USD.")
    spent_so_far: float = Field(default=0.0,
        description="Cumulative spend in USD up to this step.")

    # ── Contextual Signals (Privacy-Native β€” no user IDs) ──────────────────
    current_context: str = Field(...,
        description="Content category derived from MIND news.tsv taxonomy "
                    "(e.g. 'Fitness', 'Tech', 'Fashion', 'Gaming').")
    news_category: str = Field(default="",
        description="Fine-grained MIND subcategory (e.g. 'nfl', 'gadgets'). "
                    "Provides richer signal than coarse context alone.")
    viral_trend: str = Field(...,
        description="Current cultural viral token surfaced from Reels "
                    "(e.g. 'Quiet Luxury', 'Eco-Friendly', 'Cyberpunk', 'Minimalism').")

    # ── hard_assembly: live scraped hashtags + source creative ─────────────
    live_hashtags: List[str] = Field(default_factory=list,
        description="[hard_assembly] Real-time scraped viral hashtags from "
                    "Google Trends / Reddit.  The agent selects which to use "
                    "and weaves them into generated_caption. "
                    "Example: ['#QuietLuxury', '#OOTD', '#SlowFashion'].")
    image_description: str = Field(default="",
        description="[hard_assembly] Text description of the source ad image "
                    "from AdCaptionDataset (COCO or seed pool). "
                    "Agent caption must stay coherent with this.")
    base_caption: str = Field(default="",
        description="[hard_assembly] Base caption from AdCaptionDataset. "
                    "Agent rewrites this to incorporate viral hashtags.")

    # ── Market Signals ─────────────────────────────────────────────────────
    market_pressure: float = Field(default=0.5, ge=0.0, le=1.0,
        description="Normalised indicator of how competitive the auction is "
                    "this hour (0 = cheap, 1 = very expensive).")

    # ── Session State ──────────────────────────────────────────────────────
    ads_shown_this_session: int = Field(default=0,
        description="Number of ads already shown; drives the fatigue penalty.")
    fatigue_level: float = Field(default=0.0, ge=0.0, le=1.0,
        description="Accumulated user-fatigue penalty (0 = fresh, 1 = fully fatigued).")
    carryover_boost: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_sequencing] Carry-over CTR boost from winning prior auctions.")

    # ── Performance Feedback (delayed by 1 step) ───────────────────────────
    last_ctr: float = Field(default=0.0, ge=0.0, le=1.0,
        description="CTR returned by the User Simulator on the previous step.")
    cumulative_revenue: float = Field(default=0.0,
        description="Total revenue earned so far.")


# ---------------------------------------------------------------------------
# Action  (what the agent *does* each step)
# ---------------------------------------------------------------------------

class Action(BaseModel):
    # Continuous: auction bid
    bid_price: float = Field(..., ge=0.0,
        description="Bid submitted to the RTB auction in USD.")

    # Discrete: creative selection
    headline_id: int = Field(..., ge=0, le=5,
        description="Index into the Headlines Catalog (0–5).")
    creative_id: int = Field(..., ge=0, le=5,
        description="Index into the Creatives Catalog (0–5).")

    # ── hard_assembly fields ────────────────────────────────────────────────
    generated_caption: Optional[str] = Field(default=None,
        description="[hard_assembly] Final assembled caption β€” should incorporate "
                    "viral hashtags and remain coherent with the source image. "
                    "Leave None for easy/medium tasks.")

    generated_hashtags: Optional[List[str]] = Field(default=None,
        description="[hard_assembly] List of hashtag strings (with #) that the agent "
                    "chose to include. The agent must scrape these from ViralHashtagScraper "
                    "and select which ones to weave into generated_caption. "
                    "Example: ['#QuietLuxury', '#OOTD', '#SlowFashion']. "
                    "Leave None for easy/medium/sequencing tasks.")


# ---------------------------------------------------------------------------
# Per-step Info  (returned alongside reward; not part of observation)
# ---------------------------------------------------------------------------

class Info(BaseModel):
    task_id: str
    current_step: int
    total_revenue: float
    clearing_price: float = Field(default=0.0,
        description="The winning competitor bid price this step.")
    auction_won: bool = Field(default=False,
        description="Whether the agent won the auction this step.")
    raw_ctr: float = Field(default=0.0,
        description="CTR before fatigue penalty applied.")
    adjusted_ctr: float = Field(default=0.0,
        description="CTR after fatigue penalty.")

    # ── Per-task grader scores ────────────────────────────────────────────
    task_score: float = Field(..., ge=0.0, le=1.0,
        description="Final 0.0–1.0 task-completion score.")

    # Level 1 sub-score
    headline_alignment_score: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[easy_headline] CTR_selected / CTR_best for this context.")

    # Level 2 sub-score
    pacing_score: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[medium_pacing] Budget-smoothness and peak-hour survival bonus.")

    # Level 3 sub-scores (all three axes)
    clip_similarity_score: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_assembly] Composite grader score (0.35Γ—hashtag + 0.35Γ—align + 0.30Γ—coherence).")
    hashtag_relevance_score: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_assembly] Mean cosine_sim(chosen_hashtag, viral_trend).")
    caption_trend_alignment: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_assembly] cosine_sim(final_caption, viral_trend).")
    caption_image_coherence: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_assembly] cosine_sim(final_caption, image_description).")
    chosen_hashtags: List[str] = Field(default_factory=list,
        description="[hard_assembly] Hashtags the agent chose this step.")
    assembly_reward_bonus: float = Field(default=0.0,
        description="[hard_assembly] Extra reward granted for viral alignment quality.")

    # Level 4 sub-scores
    sequencing_score: float = Field(default=0.0, ge=0.0, le=1.0,
        description="[hard_sequencing] agent_conversions / oracle_conversions Γ— diversity.")
    contexts_covered: int = Field(default=0,
        description="[hard_sequencing] Number of distinct contexts won at least once.")
    diversity_multiplier: float = Field(default=1.0,
        description="[hard_sequencing] Bonus multiplier for covering β‰₯3 contexts.")