Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- Dockerfile +18 -0
- README.md +124 -6
- __init__.py +0 -0
- baseline.py +259 -0
- client.py +94 -0
- models.py +135 -0
- openenv.yaml +3 -0
- prd.md +615 -0
- pyproject.toml +21 -0
- requirements.txt +7 -0
- server/Dockerfile +15 -0
- server/__init__.py +0 -0
- server/app.py +177 -0
- server/environment.py +252 -0
- server/simulation.py +473 -0
- server/tasks.py +399 -0
Dockerfile
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
| 6 |
+
|
| 7 |
+
# Install openenv-core from source (not on PyPI)
|
| 8 |
+
RUN pip install --no-cache-dir git+https://github.com/meta-pytorch/OpenEnv.git
|
| 9 |
+
|
| 10 |
+
COPY requirements.txt .
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
EXPOSE 8000
|
| 16 |
+
|
| 17 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 18 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,128 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: GTM Strategy Optimizer
|
| 3 |
+
emoji: π
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
+
tags:
|
| 9 |
+
- openenv
|
| 10 |
+
base_path: /web
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# GTM Strategy Optimizer β OpenEnv Environment
|
| 14 |
+
|
| 15 |
+
An RL environment that simulates **Go-To-Market (GTM) strategy optimization** for product launches. Agents learn to allocate marketing budgets, target customer segments, craft messaging, run experiments, and adjust pricing to maximize revenue under uncertainty.
|
| 16 |
+
|
| 17 |
+
## Why GTM?
|
| 18 |
+
|
| 19 |
+
Every startup and growth team does GTM optimization manually β iterating on channels, messaging, and targeting through trial and error. This environment captures the real complexity: noisy metrics, delayed brand effects, diminishing returns on ad spend, and the tension between short-term revenue and long-term brand strength.
|
| 20 |
+
|
| 21 |
+
## Action Space
|
| 22 |
+
|
| 23 |
+
Each timestep (1 week), the agent chooses:
|
| 24 |
+
|
| 25 |
+
| Action | Type | Description |
|
| 26 |
+
|--------|------|-------------|
|
| 27 |
+
| `budget_allocation` | `dict[str, float]` | Channel β fraction of weekly budget (sum β€ 1.0) |
|
| 28 |
+
| `segment_targeting` | `dict[str, float]` | Segment β targeting weight (sum β 1.0) |
|
| 29 |
+
| `messaging` | `dict[str, float]` | Dimension β emphasis weight (sum β 1.0) |
|
| 30 |
+
| `experiment` | `str \| null` | Optional experiment to launch |
|
| 31 |
+
| `pricing_action` | `str \| null` | Optional pricing change |
|
| 32 |
+
|
| 33 |
+
**Messaging dimensions:** cost_savings, performance, reliability, innovation, ease_of_use, security
|
| 34 |
+
|
| 35 |
+
## Observation Space
|
| 36 |
+
|
| 37 |
+
| Field | Type | Description |
|
| 38 |
+
|-------|------|-------------|
|
| 39 |
+
| `week` / `total_weeks` | `int` | Current week and episode length |
|
| 40 |
+
| `budget_remaining` | `float` | Remaining budget |
|
| 41 |
+
| `channel_metrics` | `dict` | Per-channel: impressions, clicks, conversions, spend, CTR, CVR, ROI |
|
| 42 |
+
| `funnel` | `dict` | Visitors, signups, activations, retained users + rates |
|
| 43 |
+
| `segment_performance` | `dict` | Per-segment: conversion rate, engagement, churn, revenue |
|
| 44 |
+
| `experiment_result` | `dict \| null` | Completed experiment results |
|
| 45 |
+
| `brand_score` | `float` | Noisy proxy for brand health (0-100) |
|
| 46 |
+
| `total_revenue` | `float` | Cumulative revenue |
|
| 47 |
+
| `message` | `str` | Human-readable summary |
|
| 48 |
+
|
| 49 |
+
## Tasks
|
| 50 |
+
|
| 51 |
+
| Task | Difficulty | Weeks | Channels | Segments | Features |
|
| 52 |
+
|------|-----------|-------|----------|----------|----------|
|
| 53 |
+
| `channel_optimizer` | Easy | 12 | 3 | 2 | Budget + targeting only |
|
| 54 |
+
| `growth_strategist` | Medium | 24 | 5 | 3 | + experiments, pricing, brand management |
|
| 55 |
+
| `market_dominator` | Hard | 36 | 7 | 4 | + active competitor, market regime shifts, compliance traps |
|
| 56 |
+
|
| 57 |
+
## Setup & Usage
|
| 58 |
+
|
| 59 |
+
### Local Development
|
| 60 |
+
|
| 61 |
+
```bash
|
| 62 |
+
pip install -r requirements.txt
|
| 63 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --reload
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
### Docker
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
docker build -t gtm-optimizer -f server/Dockerfile .
|
| 70 |
+
docker run -p 8000:8000 gtm-optimizer
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
### Client Usage
|
| 74 |
+
|
| 75 |
+
```python
|
| 76 |
+
from client import GTMEnv
|
| 77 |
+
from models import GTMAction
|
| 78 |
+
|
| 79 |
+
with GTMEnv(base_url="http://localhost:8000").sync() as env:
|
| 80 |
+
result = env.reset(task_id="channel_optimizer")
|
| 81 |
+
while not result.done:
|
| 82 |
+
action = GTMAction(
|
| 83 |
+
budget_allocation={"paid_search": 0.5, "paid_social": 0.3, "email_lifecycle": 0.2},
|
| 84 |
+
segment_targeting={"startup_founders": 0.6, "smb_owners": 0.4},
|
| 85 |
+
messaging={"performance": 0.3, "innovation": 0.3, "ease_of_use": 0.2, "cost_savings": 0.1, "reliability": 0.05, "security": 0.05},
|
| 86 |
+
)
|
| 87 |
+
result = env.step(action)
|
| 88 |
+
print(f"Score: {result.observation.reward}")
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
### Baseline Inference
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
export OPENAI_API_KEY=sk-...
|
| 95 |
+
python baseline.py --model gpt-4o-mini
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
### API Endpoints
|
| 99 |
+
|
| 100 |
+
| Endpoint | Method | Description |
|
| 101 |
+
|----------|--------|-------------|
|
| 102 |
+
| `/tasks` | GET | List all tasks with action schemas |
|
| 103 |
+
| `/baseline` | POST | Run heuristic baseline, return scores |
|
| 104 |
+
| `/grader` | POST | Get grader score for a task |
|
| 105 |
+
| `/reset` | POST | Reset environment for a task |
|
| 106 |
+
| `/step` | POST | Execute one action step |
|
| 107 |
+
| `/state` | GET | Get current episode state |
|
| 108 |
+
| `/health` | GET | Health check |
|
| 109 |
+
| `/ws` | WS | WebSocket endpoint for persistent sessions |
|
| 110 |
+
|
| 111 |
+
## Baseline Scores
|
| 112 |
+
|
| 113 |
+
| Task | Heuristic (equal alloc) |
|
| 114 |
+
|------|------------------------|
|
| 115 |
+
| `channel_optimizer` | ~0.51 |
|
| 116 |
+
| `growth_strategist` | ~0.33 |
|
| 117 |
+
| `market_dominator` | ~0.42 |
|
| 118 |
+
|
| 119 |
+
Scores improve with intelligent channel selection, messaging alignment, and experimentation.
|
| 120 |
+
|
| 121 |
+
## Environment Dynamics
|
| 122 |
+
|
| 123 |
+
- **Diminishing returns**: Channel effectiveness decays with cumulative spend
|
| 124 |
+
- **Brand evolution**: Consistent messaging builds brand; variance erodes it
|
| 125 |
+
- **Noisy observations**: All metrics include noise proportional to difficulty
|
| 126 |
+
- **Delayed effects**: Brand investment pays off over weeks, not immediately
|
| 127 |
+
- **Competitor response** (hard mode): Competitor increases aggression when you perform well
|
| 128 |
+
- **Market shifts** (hard mode): Demand shocks at weeks ~12 and ~24
|
__init__.py
ADDED
|
File without changes
|
baseline.py
ADDED
|
@@ -0,0 +1,259 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Baseline inference script for the GTM Strategy Optimizer.
|
| 2 |
+
|
| 3 |
+
Uses the OpenAI API to run an LLM agent against all 3 tasks.
|
| 4 |
+
Reads OPENAI_API_KEY from environment variables.
|
| 5 |
+
|
| 6 |
+
Usage:
|
| 7 |
+
export OPENAI_API_KEY=sk-...
|
| 8 |
+
python baseline.py [--task TASK_ID] [--model MODEL]
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import argparse
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
import sys
|
| 17 |
+
|
| 18 |
+
# Add parent to path for imports
|
| 19 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 20 |
+
|
| 21 |
+
from openai import OpenAI
|
| 22 |
+
|
| 23 |
+
from models import GTMAction
|
| 24 |
+
from server.simulation import MESSAGING_DIMS
|
| 25 |
+
from server.tasks import create_simulator, get_task, TASKS
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
SYSTEM_PROMPT = """You are a Go-To-Market (GTM) strategy optimizer. You manage a product launch by making weekly decisions about:
|
| 29 |
+
|
| 30 |
+
1. **Budget allocation**: How to split your weekly marketing budget across available channels
|
| 31 |
+
2. **Segment targeting**: How to weight your targeting across customer segments
|
| 32 |
+
3. **Messaging**: Which value propositions to emphasize
|
| 33 |
+
4. **Experiments** (if available): Which experiments to run
|
| 34 |
+
5. **Pricing** (if available): Whether to adjust pricing
|
| 35 |
+
|
| 36 |
+
You receive weekly performance metrics and must respond with a JSON action.
|
| 37 |
+
|
| 38 |
+
Strategy tips:
|
| 39 |
+
- Diversify budget initially, then double down on high-performing channels
|
| 40 |
+
- Match messaging to segment preferences (e.g., startups care about innovation/performance)
|
| 41 |
+
- Maintain brand consistency β don't change messaging wildly week to week
|
| 42 |
+
- Use experiments to validate hypotheses before scaling
|
| 43 |
+
- Monitor ROI per channel and shift budget away from underperforming channels
|
| 44 |
+
|
| 45 |
+
Your response must be ONLY valid JSON matching this schema:
|
| 46 |
+
{
|
| 47 |
+
"budget_allocation": {"channel_name": fraction, ...}, // fractions sum to <= 1.0
|
| 48 |
+
"segment_targeting": {"segment_name": weight, ...}, // weights sum to ~1.0
|
| 49 |
+
"messaging": {"dimension": weight, ...}, // weights sum to ~1.0
|
| 50 |
+
"experiment": "experiment_type" or null,
|
| 51 |
+
"pricing_action": "action" or null
|
| 52 |
+
}
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def format_observation(obs_dict: dict) -> str:
|
| 57 |
+
"""Format observation into a readable prompt for the LLM."""
|
| 58 |
+
parts = [f"**Week {obs_dict['week']}/{obs_dict['total_weeks']}**"]
|
| 59 |
+
parts.append(f"Budget remaining: ${obs_dict['budget_remaining']:,.0f} (${obs_dict['weekly_budget']:,.0f}/week)")
|
| 60 |
+
parts.append(f"Brand score: {obs_dict['brand_score']:.0f}/100")
|
| 61 |
+
parts.append(f"Total revenue: ${obs_dict['total_revenue']:,.0f} | Conversions: {obs_dict['total_conversions']} | Avg CAC: ${obs_dict['average_cac']:,.0f}")
|
| 62 |
+
|
| 63 |
+
parts.append("\n**Channel Performance:**")
|
| 64 |
+
for ch, m in obs_dict.get("channel_metrics", {}).items():
|
| 65 |
+
parts.append(
|
| 66 |
+
f" {ch}: {m['impressions']} imp, {m['clicks']} clicks, "
|
| 67 |
+
f"{m['conversions']} conv, ${m['spend']:,.0f} spend, ROI={m['roi']:.2f}"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
parts.append("\n**Segment Performance:**")
|
| 71 |
+
for seg, m in obs_dict.get("segment_performance", {}).items():
|
| 72 |
+
parts.append(
|
| 73 |
+
f" {seg}: CVR={m['conversion_rate']:.4f}, "
|
| 74 |
+
f"engagement={m['engagement_score']:.1f}, ${m['revenue']:,.0f} rev"
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
if obs_dict.get("experiment_result"):
|
| 78 |
+
er = obs_dict["experiment_result"]
|
| 79 |
+
parts.append(f"\n**Experiment Result:** {er['recommendation']}")
|
| 80 |
+
|
| 81 |
+
parts.append(f"\nAvailable channels: {obs_dict['available_channels']}")
|
| 82 |
+
parts.append(f"Available segments: {obs_dict['available_segments']}")
|
| 83 |
+
if obs_dict.get("available_experiments"):
|
| 84 |
+
parts.append(f"Available experiments: {obs_dict['available_experiments']}")
|
| 85 |
+
if obs_dict.get("available_pricing_actions"):
|
| 86 |
+
parts.append(f"Available pricing actions: {obs_dict['available_pricing_actions']}")
|
| 87 |
+
parts.append(f"Messaging dimensions: {obs_dict['messaging_dimensions']}")
|
| 88 |
+
|
| 89 |
+
return "\n".join(parts)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def parse_llm_action(response_text: str, task_id: str) -> dict:
|
| 93 |
+
"""Parse LLM response into an action dict. Falls back to equal allocation."""
|
| 94 |
+
task_def = get_task(task_id)
|
| 95 |
+
channels = [c.name for c in task_def.channels]
|
| 96 |
+
segments = [s.name for s in task_def.segments]
|
| 97 |
+
|
| 98 |
+
# Default fallback
|
| 99 |
+
fallback = {
|
| 100 |
+
"budget_allocation": {ch: 1.0 / len(channels) for ch in channels},
|
| 101 |
+
"segment_targeting": {seg: 1.0 / len(segments) for seg in segments},
|
| 102 |
+
"messaging": {dim: 1.0 / len(MESSAGING_DIMS) for dim in MESSAGING_DIMS},
|
| 103 |
+
"experiment": None,
|
| 104 |
+
"pricing_action": None,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
try:
|
| 108 |
+
# Try to extract JSON from response
|
| 109 |
+
text = response_text.strip()
|
| 110 |
+
if "```json" in text:
|
| 111 |
+
text = text.split("```json")[1].split("```")[0].strip()
|
| 112 |
+
elif "```" in text:
|
| 113 |
+
text = text.split("```")[1].split("```")[0].strip()
|
| 114 |
+
|
| 115 |
+
action = json.loads(text)
|
| 116 |
+
|
| 117 |
+
# Validate keys exist
|
| 118 |
+
if "budget_allocation" not in action:
|
| 119 |
+
action["budget_allocation"] = fallback["budget_allocation"]
|
| 120 |
+
if "segment_targeting" not in action:
|
| 121 |
+
action["segment_targeting"] = fallback["segment_targeting"]
|
| 122 |
+
if "messaging" not in action:
|
| 123 |
+
action["messaging"] = fallback["messaging"]
|
| 124 |
+
|
| 125 |
+
return action
|
| 126 |
+
except (json.JSONDecodeError, IndexError, KeyError):
|
| 127 |
+
return fallback
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def run_episode(task_id: str, model: str = "gpt-4o-mini", seed: int = 42, verbose: bool = True) -> float:
|
| 131 |
+
"""Run one episode of the given task with an LLM agent."""
|
| 132 |
+
client = OpenAI()
|
| 133 |
+
task_def = get_task(task_id)
|
| 134 |
+
sim = create_simulator(task_id, seed=seed)
|
| 135 |
+
|
| 136 |
+
channels = list(sim.channels.keys())
|
| 137 |
+
segments = list(sim.segments.keys())
|
| 138 |
+
|
| 139 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
| 140 |
+
|
| 141 |
+
# Initial observation prompt
|
| 142 |
+
initial_msg = (
|
| 143 |
+
f"You are managing a GTM campaign: **{task_def.name}** ({task_def.difficulty})\n"
|
| 144 |
+
f"{task_def.description}\n\n"
|
| 145 |
+
f"Duration: {task_def.total_weeks} weeks | Budget: ${task_def.total_budget:,.0f}\n"
|
| 146 |
+
f"Channels: {channels}\n"
|
| 147 |
+
f"Segments: {segments}\n"
|
| 148 |
+
f"Messaging dimensions: {MESSAGING_DIMS}\n"
|
| 149 |
+
)
|
| 150 |
+
if task_def.available_experiments:
|
| 151 |
+
initial_msg += f"Experiments: {task_def.available_experiments}\n"
|
| 152 |
+
if task_def.available_pricing_actions:
|
| 153 |
+
initial_msg += f"Pricing actions: {task_def.available_pricing_actions}\n"
|
| 154 |
+
initial_msg += "\nProvide your first week's action as JSON."
|
| 155 |
+
|
| 156 |
+
messages.append({"role": "user", "content": initial_msg})
|
| 157 |
+
|
| 158 |
+
while not sim.is_done:
|
| 159 |
+
# Get LLM action
|
| 160 |
+
try:
|
| 161 |
+
response = client.chat.completions.create(
|
| 162 |
+
model=model,
|
| 163 |
+
messages=messages,
|
| 164 |
+
temperature=0.3,
|
| 165 |
+
max_tokens=500,
|
| 166 |
+
)
|
| 167 |
+
llm_text = response.choices[0].message.content or ""
|
| 168 |
+
except Exception as e:
|
| 169 |
+
if verbose:
|
| 170 |
+
print(f" LLM API error: {e}, using fallback")
|
| 171 |
+
llm_text = ""
|
| 172 |
+
|
| 173 |
+
action = parse_llm_action(llm_text, task_id)
|
| 174 |
+
|
| 175 |
+
# Step simulation
|
| 176 |
+
result = sim.step(
|
| 177 |
+
budget_allocation=action.get("budget_allocation", {}),
|
| 178 |
+
segment_targeting=action.get("segment_targeting", {}),
|
| 179 |
+
messaging=action.get("messaging", {}),
|
| 180 |
+
experiment=action.get("experiment"),
|
| 181 |
+
pricing_action=action.get("pricing_action"),
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
if verbose:
|
| 185 |
+
print(
|
| 186 |
+
f" Week {sim.state.week}/{sim.state.total_weeks} | "
|
| 187 |
+
f"Rev: ${result['weekly_revenue']:,.0f} | "
|
| 188 |
+
f"Total: ${sim.state.total_revenue:,.0f} | "
|
| 189 |
+
f"Brand: {result['brand_score_observed']:.0f}"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Build observation for next turn
|
| 193 |
+
obs_dict = {
|
| 194 |
+
"week": sim.state.week,
|
| 195 |
+
"total_weeks": sim.state.total_weeks,
|
| 196 |
+
"budget_remaining": sim.state.budget_remaining,
|
| 197 |
+
"weekly_budget": sim.state.weekly_budget,
|
| 198 |
+
"brand_score": result["brand_score_observed"],
|
| 199 |
+
"total_revenue": sim.state.total_revenue,
|
| 200 |
+
"total_conversions": sim.state.total_conversions,
|
| 201 |
+
"average_cac": sim.state.total_spend / max(sim.state.total_conversions, 1),
|
| 202 |
+
"channel_metrics": result["channel_metrics"],
|
| 203 |
+
"segment_performance": result["segment_performance"],
|
| 204 |
+
"experiment_result": result["experiment_result"],
|
| 205 |
+
"available_channels": channels,
|
| 206 |
+
"available_segments": segments,
|
| 207 |
+
"available_experiments": task_def.available_experiments,
|
| 208 |
+
"available_pricing_actions": task_def.available_pricing_actions,
|
| 209 |
+
"messaging_dimensions": MESSAGING_DIMS,
|
| 210 |
+
}
|
| 211 |
+
|
| 212 |
+
if not sim.is_done:
|
| 213 |
+
messages.append({"role": "assistant", "content": llm_text})
|
| 214 |
+
messages.append({
|
| 215 |
+
"role": "user",
|
| 216 |
+
"content": format_observation(obs_dict) + "\n\nProvide your next action as JSON.",
|
| 217 |
+
})
|
| 218 |
+
|
| 219 |
+
# Keep context manageable β trim old turns
|
| 220 |
+
if len(messages) > 12:
|
| 221 |
+
messages = [messages[0]] + messages[-10:]
|
| 222 |
+
|
| 223 |
+
score = task_def.grader(sim.state)
|
| 224 |
+
return score
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def main():
|
| 228 |
+
parser = argparse.ArgumentParser(description="GTM Baseline Inference")
|
| 229 |
+
parser.add_argument("--task", type=str, default=None, help="Run specific task (default: all)")
|
| 230 |
+
parser.add_argument("--model", type=str, default="gpt-4o-mini", help="OpenAI model name")
|
| 231 |
+
parser.add_argument("--seed", type=int, default=42)
|
| 232 |
+
parser.add_argument("--quiet", action="store_true")
|
| 233 |
+
args = parser.parse_args()
|
| 234 |
+
|
| 235 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
| 236 |
+
print("Error: OPENAI_API_KEY environment variable not set")
|
| 237 |
+
sys.exit(1)
|
| 238 |
+
|
| 239 |
+
tasks_to_run = [args.task] if args.task else list(TASKS.keys())
|
| 240 |
+
scores = {}
|
| 241 |
+
|
| 242 |
+
for task_id in tasks_to_run:
|
| 243 |
+
print(f"\n{'='*60}")
|
| 244 |
+
print(f"Running task: {task_id}")
|
| 245 |
+
print(f"{'='*60}")
|
| 246 |
+
score = run_episode(task_id, model=args.model, seed=args.seed, verbose=not args.quiet)
|
| 247 |
+
scores[task_id] = score
|
| 248 |
+
print(f"Grader score: {score:.4f}")
|
| 249 |
+
|
| 250 |
+
print(f"\n{'='*60}")
|
| 251 |
+
print("BASELINE RESULTS")
|
| 252 |
+
print(f"{'='*60}")
|
| 253 |
+
for task_id, score in scores.items():
|
| 254 |
+
print(f" {task_id}: {score:.4f}")
|
| 255 |
+
print(f" Average: {sum(scores.values()) / len(scores):.4f}")
|
| 256 |
+
|
| 257 |
+
|
| 258 |
+
if __name__ == "__main__":
|
| 259 |
+
main()
|
client.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Client for the GTM Strategy Optimizer environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any, Dict
|
| 6 |
+
|
| 7 |
+
from openenv.core.client_types import StepResult
|
| 8 |
+
from openenv.core.env_client import EnvClient
|
| 9 |
+
|
| 10 |
+
from models import (
|
| 11 |
+
ChannelMetrics,
|
| 12 |
+
ExperimentResult,
|
| 13 |
+
FunnelMetrics,
|
| 14 |
+
GTMAction,
|
| 15 |
+
GTMObservation,
|
| 16 |
+
GTMState,
|
| 17 |
+
SegmentMetrics,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class GTMEnv(EnvClient[GTMAction, GTMObservation, GTMState]):
|
| 22 |
+
"""WebSocket client for the GTM Strategy Optimizer environment."""
|
| 23 |
+
|
| 24 |
+
def _step_payload(self, action: GTMAction) -> Dict[str, Any]:
|
| 25 |
+
"""Serialize a GTMAction to JSON for the wire."""
|
| 26 |
+
return action.model_dump(exclude={"metadata"})
|
| 27 |
+
|
| 28 |
+
def _parse_result(self, payload: Dict[str, Any]) -> StepResult[GTMObservation]:
|
| 29 |
+
"""Parse server response into StepResult[GTMObservation]."""
|
| 30 |
+
obs_data = payload.get("observation", {})
|
| 31 |
+
|
| 32 |
+
# Parse nested channel metrics
|
| 33 |
+
channel_metrics = {}
|
| 34 |
+
for ch, m in obs_data.get("channel_metrics", {}).items():
|
| 35 |
+
channel_metrics[ch] = ChannelMetrics(**m) if isinstance(m, dict) else m
|
| 36 |
+
|
| 37 |
+
# Parse funnel
|
| 38 |
+
funnel_data = obs_data.get("funnel", {})
|
| 39 |
+
funnel = FunnelMetrics(**funnel_data) if isinstance(funnel_data, dict) else FunnelMetrics()
|
| 40 |
+
|
| 41 |
+
# Parse segment performance
|
| 42 |
+
segment_perf = {}
|
| 43 |
+
for seg, m in obs_data.get("segment_performance", {}).items():
|
| 44 |
+
segment_perf[seg] = SegmentMetrics(**m) if isinstance(m, dict) else m
|
| 45 |
+
|
| 46 |
+
# Parse experiment result
|
| 47 |
+
exp_data = obs_data.get("experiment_result")
|
| 48 |
+
exp_result = ExperimentResult(**exp_data) if exp_data else None
|
| 49 |
+
|
| 50 |
+
obs = GTMObservation(
|
| 51 |
+
done=payload.get("done", False),
|
| 52 |
+
reward=payload.get("reward"),
|
| 53 |
+
week=obs_data.get("week", 0),
|
| 54 |
+
total_weeks=obs_data.get("total_weeks", 12),
|
| 55 |
+
budget_remaining=obs_data.get("budget_remaining", 0.0),
|
| 56 |
+
weekly_budget=obs_data.get("weekly_budget", 0.0),
|
| 57 |
+
channel_metrics=channel_metrics,
|
| 58 |
+
funnel=funnel,
|
| 59 |
+
segment_performance=segment_perf,
|
| 60 |
+
experiment_result=exp_result,
|
| 61 |
+
brand_score=obs_data.get("brand_score", 50.0),
|
| 62 |
+
total_revenue=obs_data.get("total_revenue", 0.0),
|
| 63 |
+
total_conversions=obs_data.get("total_conversions", 0),
|
| 64 |
+
average_cac=obs_data.get("average_cac", 0.0),
|
| 65 |
+
available_channels=obs_data.get("available_channels", []),
|
| 66 |
+
available_segments=obs_data.get("available_segments", []),
|
| 67 |
+
available_experiments=obs_data.get("available_experiments", []),
|
| 68 |
+
available_pricing_actions=obs_data.get("available_pricing_actions", []),
|
| 69 |
+
messaging_dimensions=obs_data.get("messaging_dimensions", []),
|
| 70 |
+
message=obs_data.get("message", ""),
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
return StepResult(
|
| 74 |
+
observation=obs,
|
| 75 |
+
reward=payload.get("reward"),
|
| 76 |
+
done=payload.get("done", False),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
def _parse_state(self, payload: Dict[str, Any]) -> GTMState:
|
| 80 |
+
"""Parse server state response into GTMState."""
|
| 81 |
+
return GTMState(
|
| 82 |
+
episode_id=payload.get("episode_id"),
|
| 83 |
+
step_count=payload.get("step_count", 0),
|
| 84 |
+
task_id=payload.get("task_id", "channel_optimizer"),
|
| 85 |
+
difficulty=payload.get("difficulty", "easy"),
|
| 86 |
+
true_brand_strength=payload.get("true_brand_strength", 50.0),
|
| 87 |
+
true_market_demand=payload.get("true_market_demand", 1.0),
|
| 88 |
+
total_revenue=payload.get("total_revenue", 0.0),
|
| 89 |
+
total_spend=payload.get("total_spend", 0.0),
|
| 90 |
+
total_conversions=payload.get("total_conversions", 0),
|
| 91 |
+
compliance_violations=payload.get("compliance_violations", 0),
|
| 92 |
+
experiments_run=payload.get("experiments_run", 0),
|
| 93 |
+
useful_experiments=payload.get("useful_experiments", 0),
|
| 94 |
+
)
|
models.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic models for the GTM Strategy Optimizer environment."""
|
| 2 |
+
|
| 3 |
+
from typing import Any, Dict, List, Optional
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
|
| 7 |
+
from openenv.core.env_server import Action, Observation, State
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
# ββ Sub-models for structured metrics ββββββββββββββββββββββββββββββββββββββ
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
class ChannelMetrics(BaseModel):
|
| 14 |
+
"""Performance metrics for a single marketing channel."""
|
| 15 |
+
|
| 16 |
+
impressions: int = 0
|
| 17 |
+
clicks: int = 0
|
| 18 |
+
conversions: int = 0
|
| 19 |
+
spend: float = 0.0
|
| 20 |
+
ctr: float = 0.0
|
| 21 |
+
cvr: float = 0.0
|
| 22 |
+
roi: float = 0.0
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class FunnelMetrics(BaseModel):
|
| 26 |
+
"""Funnel-level metrics across all channels."""
|
| 27 |
+
|
| 28 |
+
visitors: int = 0
|
| 29 |
+
signups: int = 0
|
| 30 |
+
activations: int = 0
|
| 31 |
+
retained_users: int = 0
|
| 32 |
+
signup_rate: float = 0.0
|
| 33 |
+
activation_rate: float = 0.0
|
| 34 |
+
retention_rate: float = 0.0
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class SegmentMetrics(BaseModel):
|
| 38 |
+
"""Performance metrics for a customer segment."""
|
| 39 |
+
|
| 40 |
+
conversion_rate: float = 0.0
|
| 41 |
+
engagement_score: float = 0.0
|
| 42 |
+
churn_rate: float = 0.0
|
| 43 |
+
revenue: float = 0.0
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class ExperimentResult(BaseModel):
|
| 47 |
+
"""Result of a completed experiment."""
|
| 48 |
+
|
| 49 |
+
experiment_type: str
|
| 50 |
+
uplift_estimate: float
|
| 51 |
+
confidence: float
|
| 52 |
+
recommendation: str
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# ββ Action βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class GTMAction(Action):
|
| 59 |
+
"""Agent's weekly GTM decisions.
|
| 60 |
+
|
| 61 |
+
All allocation dicts map names to fractions (0.0-1.0).
|
| 62 |
+
Fractions in budget_allocation should sum to <= 1.0.
|
| 63 |
+
Fractions in segment_targeting and messaging should each sum to ~1.0.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
budget_allocation: Dict[str, float] = Field(
|
| 67 |
+
default_factory=dict,
|
| 68 |
+
description="Channel name -> fraction of weekly budget to allocate",
|
| 69 |
+
)
|
| 70 |
+
segment_targeting: Dict[str, float] = Field(
|
| 71 |
+
default_factory=dict,
|
| 72 |
+
description="Segment name -> targeting weight (should sum to ~1.0)",
|
| 73 |
+
)
|
| 74 |
+
messaging: Dict[str, float] = Field(
|
| 75 |
+
default_factory=dict,
|
| 76 |
+
description="Messaging dimension -> emphasis weight. Dimensions: cost_savings, performance, reliability, innovation, ease_of_use, security",
|
| 77 |
+
)
|
| 78 |
+
experiment: Optional[str] = Field(
|
| 79 |
+
default=None,
|
| 80 |
+
description="Experiment to launch: 'ab_test_landing', 'ab_test_pricing', 'ab_test_creative', 'run_survey', 'competitor_analysis', or null",
|
| 81 |
+
)
|
| 82 |
+
pricing_action: Optional[str] = Field(
|
| 83 |
+
default=None,
|
| 84 |
+
description="Pricing change: 'discount_10', 'discount_20', 'raise_5', 'add_free_trial', or null",
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
# ββ Observation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
class GTMObservation(Observation):
|
| 92 |
+
"""What the agent observes after each week of GTM activity."""
|
| 93 |
+
|
| 94 |
+
week: int = 0
|
| 95 |
+
total_weeks: int = 12
|
| 96 |
+
budget_remaining: float = 0.0
|
| 97 |
+
weekly_budget: float = 0.0
|
| 98 |
+
|
| 99 |
+
channel_metrics: Dict[str, ChannelMetrics] = Field(default_factory=dict)
|
| 100 |
+
funnel: FunnelMetrics = Field(default_factory=FunnelMetrics)
|
| 101 |
+
segment_performance: Dict[str, SegmentMetrics] = Field(default_factory=dict)
|
| 102 |
+
|
| 103 |
+
experiment_result: Optional[ExperimentResult] = None
|
| 104 |
+
|
| 105 |
+
brand_score: float = 50.0
|
| 106 |
+
|
| 107 |
+
total_revenue: float = 0.0
|
| 108 |
+
total_conversions: int = 0
|
| 109 |
+
average_cac: float = 0.0
|
| 110 |
+
|
| 111 |
+
available_channels: List[str] = Field(default_factory=list)
|
| 112 |
+
available_segments: List[str] = Field(default_factory=list)
|
| 113 |
+
available_experiments: List[str] = Field(default_factory=list)
|
| 114 |
+
available_pricing_actions: List[str] = Field(default_factory=list)
|
| 115 |
+
messaging_dimensions: List[str] = Field(default_factory=list)
|
| 116 |
+
|
| 117 |
+
message: str = ""
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
# ββ State ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class GTMState(State):
|
| 124 |
+
"""Internal environment state (includes hidden ground truth)."""
|
| 125 |
+
|
| 126 |
+
task_id: str = "channel_optimizer"
|
| 127 |
+
difficulty: str = "easy"
|
| 128 |
+
true_brand_strength: float = 50.0
|
| 129 |
+
true_market_demand: float = 1.0
|
| 130 |
+
total_revenue: float = 0.0
|
| 131 |
+
total_spend: float = 0.0
|
| 132 |
+
total_conversions: int = 0
|
| 133 |
+
compliance_violations: int = 0
|
| 134 |
+
experiments_run: int = 0
|
| 135 |
+
useful_experiments: int = 0
|
openenv.yaml
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: gtm-strategy-optimizer
|
| 2 |
+
version: "1.0.0"
|
| 3 |
+
description: "RL environment simulating Go-To-Market strategy optimization β budget allocation, ICP targeting, messaging, and experimentation under uncertainty"
|
prd.md
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# PRD + Design Doc
|
| 2 |
+
|
| 3 |
+
# Autonomous GTM Strategy Optimizer (RL Environment)
|
| 4 |
+
|
| 5 |
+
## 1. Objective
|
| 6 |
+
|
| 7 |
+
Build a **reinforcement learning environment** that simulates the real-world Go-To-Market (GTM) lifecycle for launching and scaling a product.
|
| 8 |
+
|
| 9 |
+
The environment must capture the complexity faced by real growth teams:
|
| 10 |
+
|
| 11 |
+
* budget allocation across channels
|
| 12 |
+
* ICP discovery
|
| 13 |
+
* messaging optimization
|
| 14 |
+
* funnel optimization
|
| 15 |
+
* experimentation planning
|
| 16 |
+
* tradeoff between short-term revenue vs long-term brand strength
|
| 17 |
+
* noisy and delayed feedback
|
| 18 |
+
* competitor reactions
|
| 19 |
+
* market regime shifts
|
| 20 |
+
|
| 21 |
+
The RL agent must learn a **policy that maximizes long-term business outcomes** under uncertainty and constraints.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
# 2. Real-world task being simulated
|
| 26 |
+
|
| 27 |
+
Human teams perform iterative GTM optimization:
|
| 28 |
+
|
| 29 |
+
1. define positioning
|
| 30 |
+
2. select customer segment
|
| 31 |
+
3. allocate budget
|
| 32 |
+
4. launch campaigns
|
| 33 |
+
5. observe funnel metrics
|
| 34 |
+
6. run experiments
|
| 35 |
+
7. refine messaging
|
| 36 |
+
8. reallocate budget
|
| 37 |
+
9. scale successful channels
|
| 38 |
+
10. adjust pricing/packaging
|
| 39 |
+
|
| 40 |
+
The environment simulates:
|
| 41 |
+
|
| 42 |
+
* imperfect attribution
|
| 43 |
+
* delayed conversions
|
| 44 |
+
* creative fatigue
|
| 45 |
+
* nonlinear scaling effects
|
| 46 |
+
* interactions between channels
|
| 47 |
+
|
| 48 |
+
---
|
| 49 |
+
|
| 50 |
+
# 3. Scope of environment
|
| 51 |
+
|
| 52 |
+
Episode represents:
|
| 53 |
+
|
| 54 |
+
> lifecycle of a product launch (12β52 timesteps)
|
| 55 |
+
|
| 56 |
+
Each timestep simulates:
|
| 57 |
+
|
| 58 |
+
> 1 week of GTM activity
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
# 4. Core entities in environment
|
| 63 |
+
|
| 64 |
+
### Product
|
| 65 |
+
|
| 66 |
+
```json
|
| 67 |
+
{
|
| 68 |
+
category,
|
| 69 |
+
price_range,
|
| 70 |
+
complexity,
|
| 71 |
+
differentiation_strength,
|
| 72 |
+
maturity_stage
|
| 73 |
+
}
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
### Market
|
| 77 |
+
|
| 78 |
+
```json
|
| 79 |
+
{
|
| 80 |
+
total_demand,
|
| 81 |
+
growth_rate,
|
| 82 |
+
noise_level,
|
| 83 |
+
competition_intensity,
|
| 84 |
+
seasonality_pattern
|
| 85 |
+
}
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
### Customer segments
|
| 89 |
+
|
| 90 |
+
example:
|
| 91 |
+
|
| 92 |
+
```json
|
| 93 |
+
[
|
| 94 |
+
{
|
| 95 |
+
name: "startup_founders",
|
| 96 |
+
price_sensitivity: high,
|
| 97 |
+
feature_preference_vector,
|
| 98 |
+
acquisition_channel_affinity,
|
| 99 |
+
churn_probability
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Channels
|
| 105 |
+
|
| 106 |
+
* paid search
|
| 107 |
+
* paid social
|
| 108 |
+
* organic content
|
| 109 |
+
* outbound sales
|
| 110 |
+
* partnerships
|
| 111 |
+
* email lifecycle
|
| 112 |
+
* influencer marketing
|
| 113 |
+
|
| 114 |
+
Each channel has:
|
| 115 |
+
|
| 116 |
+
```json
|
| 117 |
+
{
|
| 118 |
+
base_ctr,
|
| 119 |
+
base_cvr,
|
| 120 |
+
saturation_point,
|
| 121 |
+
cost_curve,
|
| 122 |
+
response_variance
|
| 123 |
+
}
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
# 5. Environment inputs
|
| 129 |
+
|
| 130 |
+
## static inputs
|
| 131 |
+
|
| 132 |
+
### product description
|
| 133 |
+
|
| 134 |
+
text embedding or structured attributes
|
| 135 |
+
|
| 136 |
+
### initial market conditions
|
| 137 |
+
|
| 138 |
+
### initial budget
|
| 139 |
+
|
| 140 |
+
### initial ICP guess
|
| 141 |
+
|
| 142 |
+
### campaign constraints
|
| 143 |
+
|
| 144 |
+
---
|
| 145 |
+
|
| 146 |
+
## dynamic observations per timestep
|
| 147 |
+
|
| 148 |
+
### performance metrics
|
| 149 |
+
|
| 150 |
+
```json
|
| 151 |
+
{
|
| 152 |
+
impressions,
|
| 153 |
+
clicks,
|
| 154 |
+
conversions,
|
| 155 |
+
CAC,
|
| 156 |
+
revenue,
|
| 157 |
+
ROI
|
| 158 |
+
}
|
| 159 |
+
```
|
| 160 |
+
|
| 161 |
+
### funnel metrics
|
| 162 |
+
|
| 163 |
+
```json
|
| 164 |
+
{
|
| 165 |
+
visitors,
|
| 166 |
+
signup_rate,
|
| 167 |
+
activation_rate,
|
| 168 |
+
retention_rate
|
| 169 |
+
}
|
| 170 |
+
```
|
| 171 |
+
|
| 172 |
+
### segment performance
|
| 173 |
+
|
| 174 |
+
```json
|
| 175 |
+
{
|
| 176 |
+
segment_name,
|
| 177 |
+
conversion_rate,
|
| 178 |
+
engagement_score,
|
| 179 |
+
churn_rate
|
| 180 |
+
}
|
| 181 |
+
```
|
| 182 |
+
|
| 183 |
+
### experiment results
|
| 184 |
+
|
| 185 |
+
```json
|
| 186 |
+
{
|
| 187 |
+
experiment_id,
|
| 188 |
+
uplift_estimate,
|
| 189 |
+
confidence,
|
| 190 |
+
sample_size
|
| 191 |
+
}
|
| 192 |
+
```
|
| 193 |
+
|
| 194 |
+
### brand state
|
| 195 |
+
|
| 196 |
+
latent variable:
|
| 197 |
+
|
| 198 |
+
```json
|
| 199 |
+
{
|
| 200 |
+
trust_score,
|
| 201 |
+
awareness_score,
|
| 202 |
+
positioning_consistency
|
| 203 |
+
}
|
| 204 |
+
```
|
| 205 |
+
|
| 206 |
+
not directly observable; inferred via noisy proxy metrics.
|
| 207 |
+
|
| 208 |
+
---
|
| 209 |
+
|
| 210 |
+
# 6. State representation
|
| 211 |
+
|
| 212 |
+
state is partially observable.
|
| 213 |
+
|
| 214 |
+
true state:
|
| 215 |
+
|
| 216 |
+
```json
|
| 217 |
+
{
|
| 218 |
+
latent_market_demand,
|
| 219 |
+
true_segment_preferences,
|
| 220 |
+
competitor_strategy,
|
| 221 |
+
brand_strength,
|
| 222 |
+
channel_effectiveness_curves
|
| 223 |
+
}
|
| 224 |
+
```
|
| 225 |
+
|
| 226 |
+
observed state:
|
| 227 |
+
|
| 228 |
+
```json
|
| 229 |
+
s_t = {
|
| 230 |
+
time_step,
|
| 231 |
+
budget_remaining,
|
| 232 |
+
channel_metrics,
|
| 233 |
+
funnel_metrics,
|
| 234 |
+
experiment_results,
|
| 235 |
+
estimated_segment_response,
|
| 236 |
+
historical_actions
|
| 237 |
+
}
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
state representation can be encoded as:
|
| 241 |
+
|
| 242 |
+
* structured tensor
|
| 243 |
+
* graph of relationships
|
| 244 |
+
* time series embedding
|
| 245 |
+
|
| 246 |
+
---
|
| 247 |
+
|
| 248 |
+
# 7. Action space
|
| 249 |
+
|
| 250 |
+
multi-discrete or parameterized actions.
|
| 251 |
+
|
| 252 |
+
agent chooses set of actions each timestep.
|
| 253 |
+
|
| 254 |
+
---
|
| 255 |
+
|
| 256 |
+
## A. budget allocation actions
|
| 257 |
+
|
| 258 |
+
continuous:
|
| 259 |
+
|
| 260 |
+
```json
|
| 261 |
+
allocate_budget(channel_i, amount)
|
| 262 |
+
```
|
| 263 |
+
|
| 264 |
+
constraint:
|
| 265 |
+
|
| 266 |
+
```json
|
| 267 |
+
sum(budget_i) <= budget_remaining
|
| 268 |
+
```
|
| 269 |
+
|
| 270 |
+
---
|
| 271 |
+
|
| 272 |
+
## B. ICP targeting actions
|
| 273 |
+
|
| 274 |
+
discrete:
|
| 275 |
+
|
| 276 |
+
* select target segment
|
| 277 |
+
* adjust segment weighting
|
| 278 |
+
|
| 279 |
+
example:
|
| 280 |
+
|
| 281 |
+
```json
|
| 282 |
+
{
|
| 283 |
+
startup_founders: 0.6,
|
| 284 |
+
enterprises: 0.3,
|
| 285 |
+
smb: 0.1
|
| 286 |
+
}
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
---
|
| 290 |
+
|
| 291 |
+
## C. messaging actions
|
| 292 |
+
|
| 293 |
+
agent selects messaging vector:
|
| 294 |
+
|
| 295 |
+
dimensions:
|
| 296 |
+
|
| 297 |
+
* cost savings
|
| 298 |
+
* performance
|
| 299 |
+
* reliability
|
| 300 |
+
* innovation
|
| 301 |
+
* ease of use
|
| 302 |
+
* security
|
| 303 |
+
|
| 304 |
+
example:
|
| 305 |
+
|
| 306 |
+
```json
|
| 307 |
+
message_vector = [0.2, 0.5, 0.1, 0.1, 0.05, 0.05]
|
| 308 |
+
```
|
| 309 |
+
|
| 310 |
+
---
|
| 311 |
+
|
| 312 |
+
## D. experimentation actions
|
| 313 |
+
|
| 314 |
+
agent can:
|
| 315 |
+
|
| 316 |
+
* launch A/B test
|
| 317 |
+
* change landing page variant
|
| 318 |
+
* test pricing tier
|
| 319 |
+
* test creative
|
| 320 |
+
|
| 321 |
+
cost incurred:
|
| 322 |
+
|
| 323 |
+
budget + delay.
|
| 324 |
+
|
| 325 |
+
---
|
| 326 |
+
|
| 327 |
+
## E. pricing actions
|
| 328 |
+
|
| 329 |
+
* adjust price
|
| 330 |
+
* introduce discount
|
| 331 |
+
* introduce tier
|
| 332 |
+
* change free trial duration
|
| 333 |
+
|
| 334 |
+
---
|
| 335 |
+
|
| 336 |
+
## F. information gathering actions
|
| 337 |
+
|
| 338 |
+
agent can call simulated tools:
|
| 339 |
+
|
| 340 |
+
### tools
|
| 341 |
+
|
| 342 |
+
* run survey
|
| 343 |
+
* analyze cohort
|
| 344 |
+
* competitor intelligence query
|
| 345 |
+
* attribution analysis
|
| 346 |
+
|
| 347 |
+
these reduce uncertainty but cost time/budget.
|
| 348 |
+
|
| 349 |
+
---
|
| 350 |
+
|
| 351 |
+
# 8. Legal action constraints
|
| 352 |
+
|
| 353 |
+
environment enforces compliance constraints:
|
| 354 |
+
|
| 355 |
+
## disallowed actions
|
| 356 |
+
|
| 357 |
+
* discriminatory targeting
|
| 358 |
+
* false claims
|
| 359 |
+
* privacy violations
|
| 360 |
+
* prohibited data usage
|
| 361 |
+
* dark patterns
|
| 362 |
+
|
| 363 |
+
violations incur heavy penalty:
|
| 364 |
+
|
| 365 |
+
```python
|
| 366 |
+
reward -= compliance_penalty
|
| 367 |
+
```
|
| 368 |
+
|
| 369 |
+
example constraints:
|
| 370 |
+
|
| 371 |
+
### privacy
|
| 372 |
+
|
| 373 |
+
cannot use sensitive attributes:
|
| 374 |
+
|
| 375 |
+
* race
|
| 376 |
+
* religion
|
| 377 |
+
* health status
|
| 378 |
+
|
| 379 |
+
### advertising standards
|
| 380 |
+
|
| 381 |
+
cannot claim:
|
| 382 |
+
|
| 383 |
+
* false performance metrics
|
| 384 |
+
* fabricated testimonials
|
| 385 |
+
|
| 386 |
+
---
|
| 387 |
+
|
| 388 |
+
# 9. Transition dynamics
|
| 389 |
+
|
| 390 |
+
environment simulates market response.
|
| 391 |
+
|
| 392 |
+
## demand generation
|
| 393 |
+
|
| 394 |
+
```math
|
| 395 |
+
conversions =
|
| 396 |
+
demand(segment)
|
| 397 |
+
Γ channel_effectiveness(channel, segment)
|
| 398 |
+
Γ message_alignment(message, segment)
|
| 399 |
+
Γ brand_strength
|
| 400 |
+
Γ noise
|
| 401 |
+
```
|
| 402 |
+
|
| 403 |
+
---
|
| 404 |
+
|
| 405 |
+
## diminishing returns
|
| 406 |
+
|
| 407 |
+
channel effectiveness decreases as spend increases:
|
| 408 |
+
|
| 409 |
+
```math
|
| 410 |
+
effectiveness = base * exp(-alpha * spend)
|
| 411 |
+
```
|
| 412 |
+
|
| 413 |
+
---
|
| 414 |
+
|
| 415 |
+
## delayed reward dynamics
|
| 416 |
+
|
| 417 |
+
brand strength evolves:
|
| 418 |
+
|
| 419 |
+
```math
|
| 420 |
+
brand_{t+1} =
|
| 421 |
+
brand_t
|
| 422 |
+
+ beta * consistency_score
|
| 423 |
+
- gamma * messaging_variance
|
| 424 |
+
```
|
| 425 |
+
|
| 426 |
+
---
|
| 427 |
+
|
| 428 |
+
## competitor response
|
| 429 |
+
|
| 430 |
+
optional module:
|
| 431 |
+
|
| 432 |
+
competitor reacts:
|
| 433 |
+
|
| 434 |
+
* price drop
|
| 435 |
+
* increased ad spend
|
| 436 |
+
* new messaging
|
| 437 |
+
|
| 438 |
+
---
|
| 439 |
+
|
| 440 |
+
# 10. Reward function
|
| 441 |
+
|
| 442 |
+
multi-objective.
|
| 443 |
+
|
| 444 |
+
primary:
|
| 445 |
+
|
| 446 |
+
```math
|
| 447 |
+
reward =
|
| 448 |
+
w1 * revenue
|
| 449 |
+
+ w2 * conversions
|
| 450 |
+
- w3 * CAC
|
| 451 |
+
```
|
| 452 |
+
|
| 453 |
+
secondary:
|
| 454 |
+
|
| 455 |
+
```math
|
| 456 |
+
+ w4 * brand_strength
|
| 457 |
+
+ w5 * experimentation_efficiency
|
| 458 |
+
```
|
| 459 |
+
|
| 460 |
+
penalties:
|
| 461 |
+
|
| 462 |
+
```math
|
| 463 |
+
- w6 * budget_waste
|
| 464 |
+
- w7 * compliance_violation
|
| 465 |
+
```
|
| 466 |
+
|
| 467 |
+
long-term reward accumulation:
|
| 468 |
+
|
| 469 |
+
episodic return.
|
| 470 |
+
|
| 471 |
+
---
|
| 472 |
+
|
| 473 |
+
# 11. Policy design
|
| 474 |
+
|
| 475 |
+
agent learns:
|
| 476 |
+
|
| 477 |
+
```math
|
| 478 |
+
Ο(a|s)
|
| 479 |
+
```
|
| 480 |
+
|
| 481 |
+
policy architecture options:
|
| 482 |
+
|
| 483 |
+
### baseline
|
| 484 |
+
|
| 485 |
+
MLP with structured inputs.
|
| 486 |
+
|
| 487 |
+
### advanced
|
| 488 |
+
|
| 489 |
+
transformer over time series:
|
| 490 |
+
|
| 491 |
+
input:
|
| 492 |
+
|
| 493 |
+
```math
|
| 494 |
+
[s_1, s_2, ..., s_t]
|
| 495 |
+
```
|
| 496 |
+
|
| 497 |
+
captures temporal dependencies.
|
| 498 |
+
|
| 499 |
+
---
|
| 500 |
+
|
| 501 |
+
## hierarchical policy option
|
| 502 |
+
|
| 503 |
+
high level:
|
| 504 |
+
|
| 505 |
+
decide strategy direction every K steps.
|
| 506 |
+
|
| 507 |
+
low level:
|
| 508 |
+
|
| 509 |
+
execute weekly actions.
|
| 510 |
+
|
| 511 |
+
---
|
| 512 |
+
|
| 513 |
+
# 12. Evaluation metrics
|
| 514 |
+
|
| 515 |
+
agent performance evaluated across:
|
| 516 |
+
|
| 517 |
+
## financial metrics
|
| 518 |
+
|
| 519 |
+
* cumulative revenue
|
| 520 |
+
* CAC
|
| 521 |
+
* LTV
|
| 522 |
+
* ROI
|
| 523 |
+
|
| 524 |
+
## efficiency metrics
|
| 525 |
+
|
| 526 |
+
* time to product-market fit
|
| 527 |
+
* experimentation efficiency
|
| 528 |
+
* budget utilization efficiency
|
| 529 |
+
|
| 530 |
+
## robustness metrics
|
| 531 |
+
|
| 532 |
+
performance under:
|
| 533 |
+
|
| 534 |
+
* noisy markets
|
| 535 |
+
* demand shocks
|
| 536 |
+
* competitor shifts
|
| 537 |
+
|
| 538 |
+
---
|
| 539 |
+
|
| 540 |
+
# 13. Difficulty scaling
|
| 541 |
+
|
| 542 |
+
environment difficulty configurable:
|
| 543 |
+
|
| 544 |
+
| parameter | effect |
|
| 545 |
+
| ------------------- | ------------------------ |
|
| 546 |
+
| noise level | harder signal extraction |
|
| 547 |
+
| attribution error | harder credit assignment |
|
| 548 |
+
| demand volatility | harder planning |
|
| 549 |
+
| budget size | resource constraint |
|
| 550 |
+
| competitor strength | adversarial dynamics |
|
| 551 |
+
|
| 552 |
+
---
|
| 553 |
+
|
| 554 |
+
# 14. Extensions (optional)
|
| 555 |
+
|
| 556 |
+
## multi-agent version
|
| 557 |
+
|
| 558 |
+
agents:
|
| 559 |
+
|
| 560 |
+
* growth strategist
|
| 561 |
+
* performance marketer
|
| 562 |
+
* brand manager
|
| 563 |
+
|
| 564 |
+
must coordinate.
|
| 565 |
+
|
| 566 |
+
---
|
| 567 |
+
|
| 568 |
+
## LLM-powered environment components
|
| 569 |
+
|
| 570 |
+
LLM simulates:
|
| 571 |
+
|
| 572 |
+
* customer feedback
|
| 573 |
+
* survey responses
|
| 574 |
+
* qualitative insights
|
| 575 |
+
|
| 576 |
+
---
|
| 577 |
+
|
| 578 |
+
## causal structure
|
| 579 |
+
|
| 580 |
+
introduce structural causal graph:
|
| 581 |
+
|
| 582 |
+
message β perception β conversion.
|
| 583 |
+
|
| 584 |
+
agent must discover relationships.
|
| 585 |
+
|
| 586 |
+
---
|
| 587 |
+
|
| 588 |
+
# 15. Deliverables
|
| 589 |
+
|
| 590 |
+
## core
|
| 591 |
+
|
| 592 |
+
* gym environment
|
| 593 |
+
* baseline policy
|
| 594 |
+
* evaluation benchmark
|
| 595 |
+
* visualization dashboard
|
| 596 |
+
|
| 597 |
+
## documentation
|
| 598 |
+
|
| 599 |
+
* state schema
|
| 600 |
+
* action definitions
|
| 601 |
+
* reward function
|
| 602 |
+
* environment dynamics
|
| 603 |
+
|
| 604 |
+
---
|
| 605 |
+
|
| 606 |
+
If useful next, I can provide:
|
| 607 |
+
|
| 608 |
+
1. exact state tensor structure
|
| 609 |
+
2. reward function code
|
| 610 |
+
3. transition simulator pseudocode
|
| 611 |
+
4. baseline PPO implementation
|
| 612 |
+
5. architecture diagram
|
| 613 |
+
6. realistic parameter ranges
|
| 614 |
+
7. ablation ideas to impress judges
|
| 615 |
+
|
pyproject.toml
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=45", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "gtm-strategy-optimizer"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
description = "OpenEnv RL environment for Go-To-Market strategy optimization"
|
| 9 |
+
requires-python = ">=3.10"
|
| 10 |
+
dependencies = [
|
| 11 |
+
"openenv-core>=0.2.2",
|
| 12 |
+
"fastapi>=0.104.0",
|
| 13 |
+
"uvicorn>=0.24.0",
|
| 14 |
+
"pydantic>=2.0.0",
|
| 15 |
+
"websockets>=15.0.1",
|
| 16 |
+
"openai>=1.0.0",
|
| 17 |
+
"numpy>=1.24.0",
|
| 18 |
+
]
|
| 19 |
+
|
| 20 |
+
[tool.setuptools.packages.find]
|
| 21 |
+
include = ["gtm_env*", "server*"]
|
requirements.txt
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core>=0.2.2
|
| 2 |
+
fastapi>=0.104.0
|
| 3 |
+
uvicorn>=0.24.0
|
| 4 |
+
pydantic>=2.0.0
|
| 5 |
+
websockets>=15.0.1
|
| 6 |
+
openai>=1.0.0
|
| 7 |
+
numpy>=1.24.0
|
server/Dockerfile
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install git for openenv-core from GitHub
|
| 6 |
+
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
| 7 |
+
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
+
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
EXPOSE 8000
|
| 14 |
+
|
| 15 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
server/__init__.py
ADDED
|
File without changes
|
server/app.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI application for the GTM Strategy Optimizer environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import sys
|
| 7 |
+
|
| 8 |
+
# Ensure parent directory is on path for imports
|
| 9 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 10 |
+
|
| 11 |
+
from typing import Optional
|
| 12 |
+
|
| 13 |
+
from fastapi import HTTPException
|
| 14 |
+
from pydantic import BaseModel
|
| 15 |
+
|
| 16 |
+
from openenv.core.env_server import create_fastapi_app
|
| 17 |
+
|
| 18 |
+
from models import GTMAction, GTMObservation
|
| 19 |
+
from server.environment import GTMEnvironment
|
| 20 |
+
from server.tasks import TASKS
|
| 21 |
+
from server.simulation import MESSAGING_DIMS
|
| 22 |
+
|
| 23 |
+
# Create the core OpenEnv app
|
| 24 |
+
app = create_fastapi_app(GTMEnvironment, GTMAction, GTMObservation)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ββ Custom endpoints required by the hackathon βββββββββββββββββββββββββββββ
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class TaskInfo(BaseModel):
|
| 31 |
+
task_id: str
|
| 32 |
+
name: str
|
| 33 |
+
difficulty: str
|
| 34 |
+
description: str
|
| 35 |
+
total_weeks: int
|
| 36 |
+
total_budget: float
|
| 37 |
+
channels: list[str]
|
| 38 |
+
segments: list[str]
|
| 39 |
+
messaging_dimensions: list[str]
|
| 40 |
+
available_experiments: list[str]
|
| 41 |
+
available_pricing_actions: list[str]
|
| 42 |
+
action_schema: dict
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@app.get("/tasks")
|
| 46 |
+
def list_tasks() -> list[TaskInfo]:
|
| 47 |
+
"""Return list of tasks and the action schema."""
|
| 48 |
+
result = []
|
| 49 |
+
for task_id, t in TASKS.items():
|
| 50 |
+
result.append(
|
| 51 |
+
TaskInfo(
|
| 52 |
+
task_id=task_id,
|
| 53 |
+
name=t.name,
|
| 54 |
+
difficulty=t.difficulty,
|
| 55 |
+
description=t.description,
|
| 56 |
+
total_weeks=t.total_weeks,
|
| 57 |
+
total_budget=t.total_budget,
|
| 58 |
+
channels=[c.name for c in t.channels],
|
| 59 |
+
segments=[s.name for s in t.segments],
|
| 60 |
+
messaging_dimensions=MESSAGING_DIMS,
|
| 61 |
+
available_experiments=t.available_experiments,
|
| 62 |
+
available_pricing_actions=t.available_pricing_actions,
|
| 63 |
+
action_schema={
|
| 64 |
+
"budget_allocation": {
|
| 65 |
+
"type": "object",
|
| 66 |
+
"description": "channel_name -> fraction of weekly budget (sum <= 1.0)",
|
| 67 |
+
"keys": [c.name for c in t.channels],
|
| 68 |
+
},
|
| 69 |
+
"segment_targeting": {
|
| 70 |
+
"type": "object",
|
| 71 |
+
"description": "segment_name -> weight (should sum to ~1.0)",
|
| 72 |
+
"keys": [s.name for s in t.segments],
|
| 73 |
+
},
|
| 74 |
+
"messaging": {
|
| 75 |
+
"type": "object",
|
| 76 |
+
"description": "dimension -> weight (should sum to ~1.0)",
|
| 77 |
+
"keys": MESSAGING_DIMS,
|
| 78 |
+
},
|
| 79 |
+
"experiment": {
|
| 80 |
+
"type": "string|null",
|
| 81 |
+
"options": t.available_experiments,
|
| 82 |
+
},
|
| 83 |
+
"pricing_action": {
|
| 84 |
+
"type": "string|null",
|
| 85 |
+
"options": t.available_pricing_actions,
|
| 86 |
+
},
|
| 87 |
+
},
|
| 88 |
+
)
|
| 89 |
+
)
|
| 90 |
+
return result
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class GraderRequest(BaseModel):
|
| 94 |
+
task_id: str
|
| 95 |
+
episode_id: str
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
class GraderResponse(BaseModel):
|
| 99 |
+
task_id: str
|
| 100 |
+
episode_id: str
|
| 101 |
+
score: Optional[float]
|
| 102 |
+
message: str
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
@app.post("/grader")
|
| 106 |
+
def run_grader(req: GraderRequest) -> GraderResponse:
|
| 107 |
+
"""Return grader score after an episode is completed.
|
| 108 |
+
|
| 109 |
+
Note: In a full production setup, this would look up completed episodes.
|
| 110 |
+
For the hackathon, we run a quick deterministic episode if needed.
|
| 111 |
+
"""
|
| 112 |
+
if req.task_id not in TASKS:
|
| 113 |
+
raise HTTPException(status_code=400, detail=f"Unknown task_id: {req.task_id}")
|
| 114 |
+
|
| 115 |
+
# Run a deterministic episode to produce a grader score
|
| 116 |
+
from server.tasks import create_simulator, get_task
|
| 117 |
+
|
| 118 |
+
task_def = get_task(req.task_id)
|
| 119 |
+
sim = create_simulator(req.task_id, seed=42)
|
| 120 |
+
|
| 121 |
+
# Simple heuristic agent: equal allocation
|
| 122 |
+
channels = list(sim.channels.keys())
|
| 123 |
+
segments = list(sim.segments.keys())
|
| 124 |
+
equal_budget = {ch: 1.0 / len(channels) for ch in channels}
|
| 125 |
+
equal_segments = {seg: 1.0 / len(segments) for seg in segments}
|
| 126 |
+
equal_messaging = {dim: 1.0 / len(MESSAGING_DIMS) for dim in MESSAGING_DIMS}
|
| 127 |
+
|
| 128 |
+
while not sim.is_done:
|
| 129 |
+
sim.step(
|
| 130 |
+
budget_allocation=equal_budget,
|
| 131 |
+
segment_targeting=equal_segments,
|
| 132 |
+
messaging=equal_messaging,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
score = task_def.grader(sim.state)
|
| 136 |
+
return GraderResponse(
|
| 137 |
+
task_id=req.task_id,
|
| 138 |
+
episode_id=req.episode_id,
|
| 139 |
+
score=score,
|
| 140 |
+
message=f"Grader score for {task_def.name}: {score:.4f}",
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
class BaselineResponse(BaseModel):
|
| 145 |
+
scores: dict[str, float]
|
| 146 |
+
message: str
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
@app.post("/baseline")
|
| 150 |
+
def run_baseline() -> BaselineResponse:
|
| 151 |
+
"""Run a deterministic heuristic baseline and return scores for all 3 tasks."""
|
| 152 |
+
from server.tasks import create_simulator, get_task
|
| 153 |
+
|
| 154 |
+
scores = {}
|
| 155 |
+
for task_id in TASKS:
|
| 156 |
+
task_def = get_task(task_id)
|
| 157 |
+
sim = create_simulator(task_id, seed=42)
|
| 158 |
+
|
| 159 |
+
channels = list(sim.channels.keys())
|
| 160 |
+
segments = list(sim.segments.keys())
|
| 161 |
+
equal_budget = {ch: 1.0 / len(channels) for ch in channels}
|
| 162 |
+
equal_segments = {seg: 1.0 / len(segments) for seg in segments}
|
| 163 |
+
equal_messaging = {dim: 1.0 / len(MESSAGING_DIMS) for dim in MESSAGING_DIMS}
|
| 164 |
+
|
| 165 |
+
while not sim.is_done:
|
| 166 |
+
sim.step(
|
| 167 |
+
budget_allocation=equal_budget,
|
| 168 |
+
segment_targeting=equal_segments,
|
| 169 |
+
messaging=equal_messaging,
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
scores[task_id] = task_def.grader(sim.state)
|
| 173 |
+
|
| 174 |
+
return BaselineResponse(
|
| 175 |
+
scores=scores,
|
| 176 |
+
message="Baseline (equal-allocation heuristic) scores for all tasks",
|
| 177 |
+
)
|
server/environment.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GTM Strategy Optimizer β OpenEnv Environment implementation."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import uuid
|
| 6 |
+
from typing import Any, Optional
|
| 7 |
+
|
| 8 |
+
from openenv.core.env_server import Environment
|
| 9 |
+
|
| 10 |
+
from models import (
|
| 11 |
+
ChannelMetrics,
|
| 12 |
+
ExperimentResult,
|
| 13 |
+
FunnelMetrics,
|
| 14 |
+
GTMAction,
|
| 15 |
+
GTMObservation,
|
| 16 |
+
GTMState,
|
| 17 |
+
SegmentMetrics,
|
| 18 |
+
)
|
| 19 |
+
from server.simulation import EXPERIMENT_TYPES, MESSAGING_DIMS, PRICING_ACTIONS
|
| 20 |
+
from server.tasks import create_simulator, get_task, TASKS
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class GTMEnvironment(Environment):
|
| 24 |
+
"""OpenEnv environment simulating Go-To-Market strategy optimization.
|
| 25 |
+
|
| 26 |
+
Each episode represents a product launch lifecycle. The agent makes weekly
|
| 27 |
+
decisions about budget allocation, customer targeting, messaging, experiments,
|
| 28 |
+
and pricing to maximize revenue under uncertainty.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
SUPPORTS_CONCURRENT_SESSIONS = True
|
| 32 |
+
|
| 33 |
+
def __init__(self, **kwargs: Any):
|
| 34 |
+
super().__init__(**kwargs)
|
| 35 |
+
self._state = GTMState()
|
| 36 |
+
self._sim = None
|
| 37 |
+
self._task_def = None
|
| 38 |
+
self._grader_scores: dict[str, float] = {}
|
| 39 |
+
|
| 40 |
+
def reset(
|
| 41 |
+
self,
|
| 42 |
+
seed: Optional[int] = None,
|
| 43 |
+
episode_id: Optional[str] = None,
|
| 44 |
+
task_id: str = "channel_optimizer",
|
| 45 |
+
**kwargs: Any,
|
| 46 |
+
) -> GTMObservation:
|
| 47 |
+
"""Start a new GTM episode for the given task."""
|
| 48 |
+
task_def = get_task(task_id)
|
| 49 |
+
self._task_def = task_def
|
| 50 |
+
self._sim = create_simulator(task_id, seed=seed)
|
| 51 |
+
|
| 52 |
+
self._state = GTMState(
|
| 53 |
+
episode_id=episode_id or str(uuid.uuid4()),
|
| 54 |
+
step_count=0,
|
| 55 |
+
task_id=task_id,
|
| 56 |
+
difficulty=task_def.difficulty,
|
| 57 |
+
true_brand_strength=50.0,
|
| 58 |
+
true_market_demand=1.0,
|
| 59 |
+
total_revenue=0.0,
|
| 60 |
+
total_spend=0.0,
|
| 61 |
+
total_conversions=0,
|
| 62 |
+
compliance_violations=0,
|
| 63 |
+
experiments_run=0,
|
| 64 |
+
useful_experiments=0,
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
s = self._sim.state
|
| 68 |
+
channels = list(self._sim.channels.keys())
|
| 69 |
+
segments = list(self._sim.segments.keys())
|
| 70 |
+
|
| 71 |
+
return GTMObservation(
|
| 72 |
+
done=False,
|
| 73 |
+
reward=None,
|
| 74 |
+
week=0,
|
| 75 |
+
total_weeks=s.total_weeks,
|
| 76 |
+
budget_remaining=s.budget_remaining,
|
| 77 |
+
weekly_budget=s.weekly_budget,
|
| 78 |
+
channel_metrics={ch: ChannelMetrics() for ch in channels},
|
| 79 |
+
funnel=FunnelMetrics(),
|
| 80 |
+
segment_performance={seg: SegmentMetrics() for seg in segments},
|
| 81 |
+
experiment_result=None,
|
| 82 |
+
brand_score=50.0,
|
| 83 |
+
total_revenue=0.0,
|
| 84 |
+
total_conversions=0,
|
| 85 |
+
average_cac=0.0,
|
| 86 |
+
available_channels=channels,
|
| 87 |
+
available_segments=segments,
|
| 88 |
+
available_experiments=self._task_def.available_experiments,
|
| 89 |
+
available_pricing_actions=self._task_def.available_pricing_actions,
|
| 90 |
+
messaging_dimensions=MESSAGING_DIMS,
|
| 91 |
+
message=self._initial_message(task_def),
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
def step(
|
| 95 |
+
self,
|
| 96 |
+
action: GTMAction,
|
| 97 |
+
timeout_s: Optional[float] = None,
|
| 98 |
+
**kwargs: Any,
|
| 99 |
+
) -> GTMObservation:
|
| 100 |
+
"""Execute one week of GTM activity."""
|
| 101 |
+
if self._sim is None:
|
| 102 |
+
raise RuntimeError("Must call reset() before step()")
|
| 103 |
+
|
| 104 |
+
self._state.step_count += 1
|
| 105 |
+
|
| 106 |
+
# Run simulation step
|
| 107 |
+
result = self._sim.step(
|
| 108 |
+
budget_allocation=action.budget_allocation,
|
| 109 |
+
segment_targeting=action.segment_targeting,
|
| 110 |
+
messaging=action.messaging,
|
| 111 |
+
experiment=action.experiment if action.experiment in self._task_def.available_experiments else None,
|
| 112 |
+
pricing_action=action.pricing_action if action.pricing_action in self._task_def.available_pricing_actions else None,
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
s = self._sim.state
|
| 116 |
+
done = self._sim.is_done
|
| 117 |
+
|
| 118 |
+
# Update internal state
|
| 119 |
+
self._state.true_brand_strength = s.brand_strength
|
| 120 |
+
self._state.true_market_demand = s.market_demand
|
| 121 |
+
self._state.total_revenue = s.total_revenue
|
| 122 |
+
self._state.total_spend = s.total_spend
|
| 123 |
+
self._state.total_conversions = s.total_conversions
|
| 124 |
+
self._state.compliance_violations = s.compliance_violations
|
| 125 |
+
self._state.experiments_run = s.experiments_run
|
| 126 |
+
self._state.useful_experiments = s.useful_experiments
|
| 127 |
+
|
| 128 |
+
# Compute step reward (partial progress signal)
|
| 129 |
+
reward = self._compute_reward(result, s)
|
| 130 |
+
|
| 131 |
+
# If episode done, also compute and store grader score
|
| 132 |
+
if done:
|
| 133 |
+
grader_score = self._task_def.grader(s)
|
| 134 |
+
self._grader_scores[self._state.episode_id] = grader_score
|
| 135 |
+
|
| 136 |
+
# Build observation
|
| 137 |
+
channel_metrics = {
|
| 138 |
+
ch: ChannelMetrics(**m) for ch, m in result["channel_metrics"].items()
|
| 139 |
+
}
|
| 140 |
+
funnel = FunnelMetrics(**result["funnel"])
|
| 141 |
+
segment_perf = {
|
| 142 |
+
seg: SegmentMetrics(**m) for seg, m in result["segment_performance"].items()
|
| 143 |
+
}
|
| 144 |
+
exp_result = None
|
| 145 |
+
if result["experiment_result"]:
|
| 146 |
+
exp_result = ExperimentResult(**result["experiment_result"])
|
| 147 |
+
|
| 148 |
+
avg_cac = s.total_spend / max(s.total_conversions, 1)
|
| 149 |
+
|
| 150 |
+
return GTMObservation(
|
| 151 |
+
done=done,
|
| 152 |
+
reward=round(reward, 4),
|
| 153 |
+
week=s.week,
|
| 154 |
+
total_weeks=s.total_weeks,
|
| 155 |
+
budget_remaining=round(s.budget_remaining, 2),
|
| 156 |
+
weekly_budget=round(s.weekly_budget, 2),
|
| 157 |
+
channel_metrics=channel_metrics,
|
| 158 |
+
funnel=funnel,
|
| 159 |
+
segment_performance=segment_perf,
|
| 160 |
+
experiment_result=exp_result,
|
| 161 |
+
brand_score=result["brand_score_observed"],
|
| 162 |
+
total_revenue=round(s.total_revenue, 2),
|
| 163 |
+
total_conversions=s.total_conversions,
|
| 164 |
+
average_cac=round(avg_cac, 2),
|
| 165 |
+
available_channels=list(self._sim.channels.keys()),
|
| 166 |
+
available_segments=list(self._sim.segments.keys()),
|
| 167 |
+
available_experiments=self._task_def.available_experiments,
|
| 168 |
+
available_pricing_actions=self._task_def.available_pricing_actions,
|
| 169 |
+
messaging_dimensions=MESSAGING_DIMS,
|
| 170 |
+
message=self._step_message(result, s, done),
|
| 171 |
+
)
|
| 172 |
+
|
| 173 |
+
@property
|
| 174 |
+
def state(self) -> GTMState:
|
| 175 |
+
return self._state
|
| 176 |
+
|
| 177 |
+
def get_grader_score(self, episode_id: str) -> Optional[float]:
|
| 178 |
+
"""Get the grader score for a completed episode."""
|
| 179 |
+
return self._grader_scores.get(episode_id)
|
| 180 |
+
|
| 181 |
+
# ββ Private helpers ββββββββββββββββββββββββββββββββββββββββββββ
|
| 182 |
+
|
| 183 |
+
def _compute_reward(self, result: dict, s) -> float:
|
| 184 |
+
"""Per-step reward with partial progress signal."""
|
| 185 |
+
weekly_rev = result["weekly_revenue"]
|
| 186 |
+
target_weekly = self._task_def.revenue_target / self._task_def.total_weeks
|
| 187 |
+
|
| 188 |
+
# revenue component (0-0.5)
|
| 189 |
+
rev_reward = min(0.5, 0.5 * weekly_rev / max(target_weekly, 1.0))
|
| 190 |
+
|
| 191 |
+
# efficiency bonus (0-0.2)
|
| 192 |
+
weekly_spend = sum(
|
| 193 |
+
m.get("spend", 0.0) for m in result["channel_metrics"].values()
|
| 194 |
+
)
|
| 195 |
+
if weekly_spend > 0:
|
| 196 |
+
roi = weekly_rev / weekly_spend
|
| 197 |
+
eff_reward = min(0.2, 0.2 * roi / 3.0)
|
| 198 |
+
else:
|
| 199 |
+
eff_reward = 0.0
|
| 200 |
+
|
| 201 |
+
# brand maintenance (0-0.15)
|
| 202 |
+
brand_reward = 0.15 * (s.brand_strength / 100.0)
|
| 203 |
+
|
| 204 |
+
# penalties
|
| 205 |
+
waste_penalty = 0.0
|
| 206 |
+
for ch_name, m in result["channel_metrics"].items():
|
| 207 |
+
if m.get("spend", 0) > 100 and m.get("conversions", 0) == 0:
|
| 208 |
+
waste_penalty += 0.05
|
| 209 |
+
|
| 210 |
+
compliance_penalty = s.compliance_violations * 0.1
|
| 211 |
+
|
| 212 |
+
reward = rev_reward + eff_reward + brand_reward - waste_penalty - compliance_penalty
|
| 213 |
+
return max(-1.0, min(1.0, reward))
|
| 214 |
+
|
| 215 |
+
def _initial_message(self, task_def) -> str:
|
| 216 |
+
channels = ", ".join(c.name for c in task_def.channels)
|
| 217 |
+
segments = ", ".join(s.name for s in task_def.segments)
|
| 218 |
+
return (
|
| 219 |
+
f"Welcome to the GTM Strategy Optimizer β Task: {task_def.name} ({task_def.difficulty})\n"
|
| 220 |
+
f"\n"
|
| 221 |
+
f"{task_def.description}\n"
|
| 222 |
+
f"\n"
|
| 223 |
+
f"Duration: {task_def.total_weeks} weeks | Budget: ${task_def.total_budget:,.0f} "
|
| 224 |
+
f"(${task_def.total_budget / task_def.total_weeks:,.0f}/week)\n"
|
| 225 |
+
f"Channels: {channels}\n"
|
| 226 |
+
f"Segments: {segments}\n"
|
| 227 |
+
f"Product price: ${task_def.product.base_price:.0f}\n"
|
| 228 |
+
f"\n"
|
| 229 |
+
f"Allocate your budget wisely across channels and segments. "
|
| 230 |
+
f"Craft messaging that resonates with your target customers. "
|
| 231 |
+
f"Maximize revenue while building brand strength."
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
def _step_message(self, result: dict, s, done: bool) -> str:
|
| 235 |
+
weekly_rev = result["weekly_revenue"]
|
| 236 |
+
parts = [f"Week {s.week}/{s.total_weeks} | Revenue this week: ${weekly_rev:,.0f}"]
|
| 237 |
+
parts.append(
|
| 238 |
+
f"Cumulative: ${s.total_revenue:,.0f} revenue, "
|
| 239 |
+
f"{s.total_conversions} conversions, "
|
| 240 |
+
f"${s.budget_remaining:,.0f} budget remaining"
|
| 241 |
+
)
|
| 242 |
+
parts.append(f"Brand health: {result['brand_score_observed']:.0f}/100")
|
| 243 |
+
|
| 244 |
+
if result["experiment_result"]:
|
| 245 |
+
er = result["experiment_result"]
|
| 246 |
+
parts.append(f"Experiment result: {er['recommendation']}")
|
| 247 |
+
|
| 248 |
+
if done:
|
| 249 |
+
grader = self._task_def.grader(s)
|
| 250 |
+
parts.append(f"\nEpisode complete! Final grader score: {grader:.4f}")
|
| 251 |
+
|
| 252 |
+
return " | ".join(parts) if not done else "\n".join(parts)
|
server/simulation.py
ADDED
|
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Market dynamics simulation engine for the GTM environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import math
|
| 6 |
+
import random
|
| 7 |
+
from dataclasses import dataclass, field
|
| 8 |
+
from typing import Dict, List, Optional, Tuple
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# ββ Channel configuration ββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@dataclass
|
| 15 |
+
class ChannelConfig:
|
| 16 |
+
"""Static properties of a marketing channel."""
|
| 17 |
+
|
| 18 |
+
name: str
|
| 19 |
+
base_ctr: float # base click-through rate
|
| 20 |
+
base_cvr: float # base conversion rate
|
| 21 |
+
saturation_alpha: float # diminishing returns steepness
|
| 22 |
+
cost_per_impression: float # cost per 1k impressions
|
| 23 |
+
min_spend_for_signal: float # minimum spend to get any data
|
| 24 |
+
# affinity per segment (segment_name -> multiplier 0-2)
|
| 25 |
+
segment_affinity: Dict[str, float] = field(default_factory=dict)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@dataclass
|
| 29 |
+
class SegmentConfig:
|
| 30 |
+
"""Static properties of a customer segment."""
|
| 31 |
+
|
| 32 |
+
name: str
|
| 33 |
+
size: float # relative market size
|
| 34 |
+
price_sensitivity: float # 0-1, higher = more price sensitive
|
| 35 |
+
# preferred messaging dimensions (dim -> ideal weight)
|
| 36 |
+
message_preference: Dict[str, float] = field(default_factory=dict)
|
| 37 |
+
base_churn: float = 0.05
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class ProductConfig:
|
| 42 |
+
"""Product being marketed."""
|
| 43 |
+
|
| 44 |
+
base_price: float = 99.0
|
| 45 |
+
differentiation: float = 0.7 # 0-1
|
| 46 |
+
complexity: float = 0.4 # 0-1
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# ββ Simulation state βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
@dataclass
|
| 53 |
+
class SimState:
|
| 54 |
+
"""Mutable simulation state tracking all dynamics."""
|
| 55 |
+
|
| 56 |
+
week: int = 0
|
| 57 |
+
total_weeks: int = 12
|
| 58 |
+
budget_remaining: float = 50000.0
|
| 59 |
+
weekly_budget: float = 5000.0
|
| 60 |
+
|
| 61 |
+
# true latent variables
|
| 62 |
+
brand_strength: float = 50.0 # 0-100
|
| 63 |
+
market_demand: float = 1.0 # multiplier
|
| 64 |
+
competitor_aggression: float = 0.0 # 0-1
|
| 65 |
+
|
| 66 |
+
# cumulative metrics
|
| 67 |
+
total_revenue: float = 0.0
|
| 68 |
+
total_spend: float = 0.0
|
| 69 |
+
total_conversions: int = 0
|
| 70 |
+
total_impressions: int = 0
|
| 71 |
+
|
| 72 |
+
# channel cumulative spend (for diminishing returns)
|
| 73 |
+
channel_cumulative_spend: Dict[str, float] = field(default_factory=dict)
|
| 74 |
+
|
| 75 |
+
# messaging history (for consistency tracking)
|
| 76 |
+
messaging_history: List[Dict[str, float]] = field(default_factory=list)
|
| 77 |
+
|
| 78 |
+
# experiment state
|
| 79 |
+
pending_experiment: Optional[Tuple[str, int]] = None # (type, completion_week)
|
| 80 |
+
experiments_run: int = 0
|
| 81 |
+
useful_experiments: int = 0
|
| 82 |
+
|
| 83 |
+
# pricing state
|
| 84 |
+
current_discount: float = 0.0
|
| 85 |
+
has_free_trial: bool = False
|
| 86 |
+
|
| 87 |
+
# compliance
|
| 88 |
+
compliance_violations: int = 0
|
| 89 |
+
|
| 90 |
+
# per-week tracking for grading
|
| 91 |
+
weekly_revenues: List[float] = field(default_factory=list)
|
| 92 |
+
weekly_brand_scores: List[float] = field(default_factory=list)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
MESSAGING_DIMS = [
|
| 96 |
+
"cost_savings",
|
| 97 |
+
"performance",
|
| 98 |
+
"reliability",
|
| 99 |
+
"innovation",
|
| 100 |
+
"ease_of_use",
|
| 101 |
+
"security",
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
EXPERIMENT_TYPES = [
|
| 105 |
+
"ab_test_landing",
|
| 106 |
+
"ab_test_pricing",
|
| 107 |
+
"ab_test_creative",
|
| 108 |
+
"run_survey",
|
| 109 |
+
"competitor_analysis",
|
| 110 |
+
]
|
| 111 |
+
|
| 112 |
+
PRICING_ACTIONS = [
|
| 113 |
+
"discount_10",
|
| 114 |
+
"discount_20",
|
| 115 |
+
"raise_5",
|
| 116 |
+
"add_free_trial",
|
| 117 |
+
]
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
# ββ Market Simulator βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class MarketSimulator:
|
| 124 |
+
"""Simulates market response to GTM actions for one episode."""
|
| 125 |
+
|
| 126 |
+
def __init__(
|
| 127 |
+
self,
|
| 128 |
+
channels: List[ChannelConfig],
|
| 129 |
+
segments: List[SegmentConfig],
|
| 130 |
+
product: ProductConfig,
|
| 131 |
+
total_weeks: int = 12,
|
| 132 |
+
total_budget: float = 50000.0,
|
| 133 |
+
noise_level: float = 0.1,
|
| 134 |
+
enable_competitor: bool = False,
|
| 135 |
+
enable_regime_shifts: bool = False,
|
| 136 |
+
seed: Optional[int] = None,
|
| 137 |
+
):
|
| 138 |
+
self.channels = {c.name: c for c in channels}
|
| 139 |
+
self.segments = {s.name: s for s in segments}
|
| 140 |
+
self.product = product
|
| 141 |
+
self.noise_level = noise_level
|
| 142 |
+
self.enable_competitor = enable_competitor
|
| 143 |
+
self.enable_regime_shifts = enable_regime_shifts
|
| 144 |
+
self.rng = random.Random(seed)
|
| 145 |
+
|
| 146 |
+
weekly_budget = total_budget / total_weeks
|
| 147 |
+
self.state = SimState(
|
| 148 |
+
total_weeks=total_weeks,
|
| 149 |
+
budget_remaining=total_budget,
|
| 150 |
+
weekly_budget=weekly_budget,
|
| 151 |
+
channel_cumulative_spend={c.name: 0.0 for c in channels},
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
def reset(self, seed: Optional[int] = None) -> SimState:
|
| 155 |
+
"""Reset to initial state."""
|
| 156 |
+
if seed is not None:
|
| 157 |
+
self.rng = random.Random(seed)
|
| 158 |
+
total_budget = self.state.weekly_budget * self.state.total_weeks
|
| 159 |
+
self.state = SimState(
|
| 160 |
+
total_weeks=self.state.total_weeks,
|
| 161 |
+
budget_remaining=total_budget,
|
| 162 |
+
weekly_budget=total_budget / self.state.total_weeks,
|
| 163 |
+
channel_cumulative_spend={c: 0.0 for c in self.channels},
|
| 164 |
+
)
|
| 165 |
+
return self.state
|
| 166 |
+
|
| 167 |
+
def step(
|
| 168 |
+
self,
|
| 169 |
+
budget_allocation: Dict[str, float],
|
| 170 |
+
segment_targeting: Dict[str, float],
|
| 171 |
+
messaging: Dict[str, float],
|
| 172 |
+
experiment: Optional[str] = None,
|
| 173 |
+
pricing_action: Optional[str] = None,
|
| 174 |
+
) -> Dict:
|
| 175 |
+
"""Advance one week and return metrics.
|
| 176 |
+
|
| 177 |
+
Returns dict with keys:
|
| 178 |
+
channel_metrics, funnel, segment_performance,
|
| 179 |
+
experiment_result, brand_score_observed, weekly_revenue
|
| 180 |
+
"""
|
| 181 |
+
s = self.state
|
| 182 |
+
s.week += 1
|
| 183 |
+
|
| 184 |
+
# ββ Apply pricing action βββββββββββββββββββββββββββββββββββ
|
| 185 |
+
self._apply_pricing(pricing_action)
|
| 186 |
+
|
| 187 |
+
# ββ Budget spend βββββββββββββββββββββββββββββββββββββββββββ
|
| 188 |
+
total_alloc = sum(budget_allocation.values())
|
| 189 |
+
if total_alloc > 1.0:
|
| 190 |
+
# normalize
|
| 191 |
+
factor = 1.0 / total_alloc
|
| 192 |
+
budget_allocation = {k: v * factor for k, v in budget_allocation.items()}
|
| 193 |
+
|
| 194 |
+
weekly_spend = min(s.weekly_budget, s.budget_remaining)
|
| 195 |
+
channel_spends = {}
|
| 196 |
+
for ch_name, frac in budget_allocation.items():
|
| 197 |
+
if ch_name in self.channels:
|
| 198 |
+
channel_spends[ch_name] = frac * weekly_spend
|
| 199 |
+
|
| 200 |
+
actual_total_spend = sum(channel_spends.values())
|
| 201 |
+
s.budget_remaining -= actual_total_spend
|
| 202 |
+
s.total_spend += actual_total_spend
|
| 203 |
+
|
| 204 |
+
# ββ Normalize targeting & messaging ββββββββββββββββββββββββ
|
| 205 |
+
segment_targeting = self._normalize_weights(
|
| 206 |
+
segment_targeting, list(self.segments.keys())
|
| 207 |
+
)
|
| 208 |
+
messaging = self._normalize_weights(messaging, MESSAGING_DIMS)
|
| 209 |
+
s.messaging_history.append(messaging.copy())
|
| 210 |
+
|
| 211 |
+
# ββ Compute channel performance ββββββββββββββββββββββββββββ
|
| 212 |
+
channel_metrics = {}
|
| 213 |
+
total_visitors = 0
|
| 214 |
+
total_signups = 0
|
| 215 |
+
total_activations = 0
|
| 216 |
+
segment_conversions: Dict[str, float] = {seg: 0.0 for seg in self.segments}
|
| 217 |
+
segment_revenue: Dict[str, float] = {seg: 0.0 for seg in self.segments}
|
| 218 |
+
segment_engagement: Dict[str, float] = {seg: 0.0 for seg in self.segments}
|
| 219 |
+
weekly_revenue = 0.0
|
| 220 |
+
|
| 221 |
+
for ch_name, ch_cfg in self.channels.items():
|
| 222 |
+
spend = channel_spends.get(ch_name, 0.0)
|
| 223 |
+
s.channel_cumulative_spend[ch_name] += spend
|
| 224 |
+
|
| 225 |
+
if spend < ch_cfg.min_spend_for_signal:
|
| 226 |
+
channel_metrics[ch_name] = {
|
| 227 |
+
"impressions": 0, "clicks": 0, "conversions": 0,
|
| 228 |
+
"spend": spend, "ctr": 0.0, "cvr": 0.0, "roi": 0.0,
|
| 229 |
+
}
|
| 230 |
+
continue
|
| 231 |
+
|
| 232 |
+
# impressions from spend (cost_per_impression is CPM)
|
| 233 |
+
# Apply diminishing returns: more spend -> higher effective CPM
|
| 234 |
+
cumulative = s.channel_cumulative_spend[ch_name]
|
| 235 |
+
diminishing = math.exp(-ch_cfg.saturation_alpha * cumulative / 100000)
|
| 236 |
+
# Weekly spend also has diminishing returns (audience saturation)
|
| 237 |
+
weekly_diminishing = 1.0 / (1.0 + spend / 2000.0)
|
| 238 |
+
effective_impressions = spend / ch_cfg.cost_per_impression * 1000 * weekly_diminishing * diminishing
|
| 239 |
+
impressions = int(max(0, effective_impressions))
|
| 240 |
+
|
| 241 |
+
# compute per-segment clicks and conversions
|
| 242 |
+
ch_clicks = 0
|
| 243 |
+
ch_conversions = 0
|
| 244 |
+
ch_revenue = 0.0
|
| 245 |
+
for seg_name, seg_cfg in self.segments.items():
|
| 246 |
+
seg_weight = segment_targeting.get(seg_name, 0.0)
|
| 247 |
+
if seg_weight < 0.01:
|
| 248 |
+
continue
|
| 249 |
+
|
| 250 |
+
seg_impressions = int(impressions * seg_weight)
|
| 251 |
+
affinity = ch_cfg.segment_affinity.get(seg_name, 1.0)
|
| 252 |
+
msg_alignment = self._message_alignment(messaging, seg_cfg)
|
| 253 |
+
brand_mult = s.brand_strength / 100.0
|
| 254 |
+
|
| 255 |
+
eff_ctr = (
|
| 256 |
+
ch_cfg.base_ctr
|
| 257 |
+
* affinity
|
| 258 |
+
* brand_mult
|
| 259 |
+
* s.market_demand
|
| 260 |
+
* (1.0 + self._noise(0.1))
|
| 261 |
+
)
|
| 262 |
+
eff_cvr = (
|
| 263 |
+
ch_cfg.base_cvr
|
| 264 |
+
* msg_alignment
|
| 265 |
+
* self.product.differentiation
|
| 266 |
+
* (1.0 + self._noise(0.1))
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
clicks = int(seg_impressions * min(eff_ctr, 0.5))
|
| 270 |
+
convs = int(clicks * min(eff_cvr, 0.8))
|
| 271 |
+
|
| 272 |
+
# revenue per conversion
|
| 273 |
+
price = self.product.base_price * (1.0 - s.current_discount)
|
| 274 |
+
price_mult = 1.0 - seg_cfg.price_sensitivity * s.current_discount * 0.5
|
| 275 |
+
rev = convs * price * max(price_mult, 0.3)
|
| 276 |
+
|
| 277 |
+
ch_clicks += clicks
|
| 278 |
+
ch_conversions += convs
|
| 279 |
+
ch_revenue += rev
|
| 280 |
+
segment_conversions[seg_name] += convs
|
| 281 |
+
segment_revenue[seg_name] += rev
|
| 282 |
+
segment_engagement[seg_name] += clicks * 0.01
|
| 283 |
+
|
| 284 |
+
ctr = ch_clicks / max(impressions, 1)
|
| 285 |
+
cvr = ch_conversions / max(ch_clicks, 1)
|
| 286 |
+
roi = (ch_revenue - spend) / max(spend, 1.0)
|
| 287 |
+
|
| 288 |
+
channel_metrics[ch_name] = {
|
| 289 |
+
"impressions": impressions,
|
| 290 |
+
"clicks": ch_clicks,
|
| 291 |
+
"conversions": ch_conversions,
|
| 292 |
+
"spend": round(spend, 2),
|
| 293 |
+
"ctr": round(ctr, 4),
|
| 294 |
+
"cvr": round(cvr, 4),
|
| 295 |
+
"roi": round(roi, 4),
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
total_visitors += ch_clicks
|
| 299 |
+
total_signups += ch_conversions
|
| 300 |
+
weekly_revenue += ch_revenue
|
| 301 |
+
s.total_conversions += ch_conversions
|
| 302 |
+
|
| 303 |
+
# ββ Funnel metrics βββββββββββββββββββββββββββββββββββββββββ
|
| 304 |
+
total_activations = int(total_signups * 0.6 * (1 + self._noise(0.05)))
|
| 305 |
+
retained = int(total_activations * 0.7 * (1 + self._noise(0.05)))
|
| 306 |
+
funnel = {
|
| 307 |
+
"visitors": total_visitors,
|
| 308 |
+
"signups": total_signups,
|
| 309 |
+
"activations": total_activations,
|
| 310 |
+
"retained_users": retained,
|
| 311 |
+
"signup_rate": round(total_signups / max(total_visitors, 1), 4),
|
| 312 |
+
"activation_rate": round(total_activations / max(total_signups, 1), 4),
|
| 313 |
+
"retention_rate": round(retained / max(total_activations, 1), 4),
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
# ββ Segment performance ββββββββββββββββββββββββββββββββββββ
|
| 317 |
+
segment_performance = {}
|
| 318 |
+
for seg_name in self.segments:
|
| 319 |
+
total_seg_imp = max(
|
| 320 |
+
sum(
|
| 321 |
+
channel_metrics.get(ch, {}).get("impressions", 0)
|
| 322 |
+
* segment_targeting.get(seg_name, 0.0)
|
| 323 |
+
for ch in self.channels
|
| 324 |
+
),
|
| 325 |
+
1,
|
| 326 |
+
)
|
| 327 |
+
conv_rate = segment_conversions[seg_name] / total_seg_imp
|
| 328 |
+
segment_performance[seg_name] = {
|
| 329 |
+
"conversion_rate": round(conv_rate, 6),
|
| 330 |
+
"engagement_score": round(min(segment_engagement[seg_name], 100.0), 2),
|
| 331 |
+
"churn_rate": round(self.segments[seg_name].base_churn * (1 + self._noise(0.1)), 4),
|
| 332 |
+
"revenue": round(segment_revenue[seg_name], 2),
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
# ββ Brand evolution ββββββββββββββββββββββββββββββββββββββββ
|
| 336 |
+
consistency = self._messaging_consistency()
|
| 337 |
+
organic_boost = sum(
|
| 338 |
+
channel_spends.get(ch, 0.0)
|
| 339 |
+
for ch in self.channels
|
| 340 |
+
if "organic" in ch or "content" in ch
|
| 341 |
+
) / max(weekly_spend, 1.0)
|
| 342 |
+
s.brand_strength = min(100.0, max(0.0,
|
| 343 |
+
s.brand_strength
|
| 344 |
+
+ 0.5 * consistency
|
| 345 |
+
+ 0.3 * organic_boost
|
| 346 |
+
- 0.2 * (1.0 - consistency)
|
| 347 |
+
+ self._noise(0.3)
|
| 348 |
+
))
|
| 349 |
+
brand_observed = s.brand_strength + self._noise(5.0) * self.noise_level * 10
|
| 350 |
+
brand_observed = max(0.0, min(100.0, brand_observed))
|
| 351 |
+
|
| 352 |
+
# ββ Competitor response (hard mode) ββββββββββββββββββββββββ
|
| 353 |
+
if self.enable_competitor and s.week > 4:
|
| 354 |
+
if weekly_revenue > s.total_revenue / max(s.week - 1, 1) * 1.2:
|
| 355 |
+
s.competitor_aggression = min(1.0, s.competitor_aggression + 0.1)
|
| 356 |
+
s.market_demand *= max(0.9, 1.0 - s.competitor_aggression * 0.05)
|
| 357 |
+
|
| 358 |
+
# ββ Market regime shifts (hard mode) βββββββββββββββββββββββ
|
| 359 |
+
if self.enable_regime_shifts:
|
| 360 |
+
if s.week in (12, 24):
|
| 361 |
+
shift = self.rng.uniform(-0.3, 0.3)
|
| 362 |
+
s.market_demand = max(0.5, min(1.5, s.market_demand + shift))
|
| 363 |
+
|
| 364 |
+
# ββ Experiment processing ββββββββββββββββββββββββββββββββββ
|
| 365 |
+
experiment_result = None
|
| 366 |
+
if experiment and experiment in EXPERIMENT_TYPES:
|
| 367 |
+
exp_cost = weekly_spend * 0.1
|
| 368 |
+
s.budget_remaining -= exp_cost
|
| 369 |
+
s.total_spend += exp_cost
|
| 370 |
+
s.experiments_run += 1
|
| 371 |
+
s.pending_experiment = (experiment, s.week + 2)
|
| 372 |
+
|
| 373 |
+
if s.pending_experiment and s.week >= s.pending_experiment[1]:
|
| 374 |
+
exp_type = s.pending_experiment[0]
|
| 375 |
+
uplift = self.rng.uniform(-0.05, 0.15)
|
| 376 |
+
confidence = self.rng.uniform(0.6, 0.95)
|
| 377 |
+
useful = uplift > 0.02 and confidence > 0.75
|
| 378 |
+
if useful:
|
| 379 |
+
s.useful_experiments += 1
|
| 380 |
+
experiment_result = {
|
| 381 |
+
"experiment_type": exp_type,
|
| 382 |
+
"uplift_estimate": round(uplift, 4),
|
| 383 |
+
"confidence": round(confidence, 4),
|
| 384 |
+
"recommendation": (
|
| 385 |
+
f"Adopt variant β {uplift:.1%} uplift at {confidence:.0%} confidence"
|
| 386 |
+
if useful
|
| 387 |
+
else f"No significant uplift detected ({uplift:.1%} at {confidence:.0%} confidence)"
|
| 388 |
+
),
|
| 389 |
+
}
|
| 390 |
+
s.pending_experiment = None
|
| 391 |
+
|
| 392 |
+
# ββ Update cumulative ββββββββββββββββββββββββββββββββββββββ
|
| 393 |
+
s.total_revenue += weekly_revenue
|
| 394 |
+
s.weekly_revenues.append(weekly_revenue)
|
| 395 |
+
s.weekly_brand_scores.append(s.brand_strength)
|
| 396 |
+
|
| 397 |
+
return {
|
| 398 |
+
"channel_metrics": channel_metrics,
|
| 399 |
+
"funnel": funnel,
|
| 400 |
+
"segment_performance": segment_performance,
|
| 401 |
+
"experiment_result": experiment_result,
|
| 402 |
+
"brand_score_observed": round(brand_observed, 1),
|
| 403 |
+
"weekly_revenue": round(weekly_revenue, 2),
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
# ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 407 |
+
|
| 408 |
+
def _noise(self, scale: float) -> float:
|
| 409 |
+
return self.rng.gauss(0, scale * self.noise_level)
|
| 410 |
+
|
| 411 |
+
def _normalize_weights(
|
| 412 |
+
self, weights: Dict[str, float], valid_keys: List[str]
|
| 413 |
+
) -> Dict[str, float]:
|
| 414 |
+
filtered = {k: max(v, 0.0) for k, v in weights.items() if k in valid_keys}
|
| 415 |
+
total = sum(filtered.values())
|
| 416 |
+
if total < 0.01:
|
| 417 |
+
# equal distribution
|
| 418 |
+
n = len(valid_keys)
|
| 419 |
+
return {k: 1.0 / n for k in valid_keys}
|
| 420 |
+
return {k: v / total for k, v in filtered.items()}
|
| 421 |
+
|
| 422 |
+
def _message_alignment(
|
| 423 |
+
self, messaging: Dict[str, float], segment: SegmentConfig
|
| 424 |
+
) -> float:
|
| 425 |
+
"""Cosine-like alignment between messaging and segment preference."""
|
| 426 |
+
dot = 0.0
|
| 427 |
+
mag_m = 0.0
|
| 428 |
+
mag_s = 0.0
|
| 429 |
+
for dim in MESSAGING_DIMS:
|
| 430 |
+
m = messaging.get(dim, 0.0)
|
| 431 |
+
s = segment.message_preference.get(dim, 1.0 / len(MESSAGING_DIMS))
|
| 432 |
+
dot += m * s
|
| 433 |
+
mag_m += m * m
|
| 434 |
+
mag_s += s * s
|
| 435 |
+
if mag_m < 1e-9 or mag_s < 1e-9:
|
| 436 |
+
return 0.5
|
| 437 |
+
return dot / (math.sqrt(mag_m) * math.sqrt(mag_s))
|
| 438 |
+
|
| 439 |
+
def _messaging_consistency(self) -> float:
|
| 440 |
+
"""How consistent messaging has been over recent weeks."""
|
| 441 |
+
history = self.state.messaging_history
|
| 442 |
+
if len(history) < 2:
|
| 443 |
+
return 1.0
|
| 444 |
+
recent = history[-min(4, len(history)):]
|
| 445 |
+
# compute variance across dimensions
|
| 446 |
+
total_var = 0.0
|
| 447 |
+
for dim in MESSAGING_DIMS:
|
| 448 |
+
vals = [m.get(dim, 0.0) for m in recent]
|
| 449 |
+
mean = sum(vals) / len(vals)
|
| 450 |
+
var = sum((v - mean) ** 2 for v in vals) / len(vals)
|
| 451 |
+
total_var += var
|
| 452 |
+
# low variance = high consistency
|
| 453 |
+
return max(0.0, 1.0 - total_var * 10)
|
| 454 |
+
|
| 455 |
+
def _apply_pricing(self, pricing_action: Optional[str]) -> None:
|
| 456 |
+
s = self.state
|
| 457 |
+
if pricing_action == "discount_10":
|
| 458 |
+
s.current_discount = 0.10
|
| 459 |
+
elif pricing_action == "discount_20":
|
| 460 |
+
s.current_discount = 0.20
|
| 461 |
+
elif pricing_action == "raise_5":
|
| 462 |
+
s.current_discount = max(0.0, s.current_discount - 0.05)
|
| 463 |
+
elif pricing_action == "add_free_trial":
|
| 464 |
+
s.has_free_trial = True
|
| 465 |
+
# free trial boosts conversions via brand
|
| 466 |
+
s.brand_strength = min(100.0, s.brand_strength + 1.0)
|
| 467 |
+
|
| 468 |
+
@property
|
| 469 |
+
def is_done(self) -> bool:
|
| 470 |
+
return (
|
| 471 |
+
self.state.week >= self.state.total_weeks
|
| 472 |
+
or self.state.budget_remaining <= 0
|
| 473 |
+
)
|
server/tasks.py
ADDED
|
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task definitions and graders for the GTM Strategy Optimizer.
|
| 2 |
+
|
| 3 |
+
Three tasks with increasing difficulty:
|
| 4 |
+
1. channel_optimizer (easy) β 12 weeks, 3 channels, 2 segments
|
| 5 |
+
2. growth_strategist (medium) β 24 weeks, 5 channels, 3 segments
|
| 6 |
+
3. market_dominator (hard) β 36 weeks, 7 channels, 4 segments + competitor + regime shifts
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from dataclasses import dataclass
|
| 12 |
+
from typing import Callable, Dict, List
|
| 13 |
+
|
| 14 |
+
from .simulation import (
|
| 15 |
+
ChannelConfig,
|
| 16 |
+
EXPERIMENT_TYPES,
|
| 17 |
+
MarketSimulator,
|
| 18 |
+
MESSAGING_DIMS,
|
| 19 |
+
PRICING_ACTIONS,
|
| 20 |
+
ProductConfig,
|
| 21 |
+
SegmentConfig,
|
| 22 |
+
SimState,
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class TaskDefinition:
|
| 28 |
+
"""Everything needed to instantiate + grade a task."""
|
| 29 |
+
|
| 30 |
+
task_id: str
|
| 31 |
+
name: str
|
| 32 |
+
difficulty: str
|
| 33 |
+
description: str
|
| 34 |
+
total_weeks: int
|
| 35 |
+
total_budget: float
|
| 36 |
+
channels: List[ChannelConfig]
|
| 37 |
+
segments: List[SegmentConfig]
|
| 38 |
+
product: ProductConfig
|
| 39 |
+
noise_level: float
|
| 40 |
+
enable_competitor: bool
|
| 41 |
+
enable_regime_shifts: bool
|
| 42 |
+
revenue_target: float # for grading
|
| 43 |
+
available_experiments: List[str]
|
| 44 |
+
available_pricing_actions: List[str]
|
| 45 |
+
grader: Callable[[SimState], float]
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
# ββ Grader functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def _grade_channel_optimizer(s: SimState) -> float:
|
| 52 |
+
"""Easy task: pure revenue vs target with partial credit."""
|
| 53 |
+
revenue_target = 120000.0
|
| 54 |
+
score = min(1.0, s.total_revenue / revenue_target)
|
| 55 |
+
return round(max(0.0, score), 4)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _grade_growth_strategist(s: SimState) -> float:
|
| 59 |
+
"""Medium task: weighted score across revenue, efficiency, brand, experiments."""
|
| 60 |
+
revenue_target = 375000.0
|
| 61 |
+
rev_score = min(1.0, s.total_revenue / revenue_target)
|
| 62 |
+
|
| 63 |
+
efficiency = s.total_revenue / max(s.total_spend, 1.0)
|
| 64 |
+
eff_score = min(1.0, efficiency / 3.0) # 3x ROI = perfect
|
| 65 |
+
|
| 66 |
+
brand_score = s.brand_strength / 100.0
|
| 67 |
+
|
| 68 |
+
exp_score = 0.0
|
| 69 |
+
if s.experiments_run > 0:
|
| 70 |
+
exp_score = min(1.0, s.useful_experiments / max(s.experiments_run * 0.5, 1.0))
|
| 71 |
+
|
| 72 |
+
score = 0.40 * rev_score + 0.30 * eff_score + 0.20 * brand_score + 0.10 * exp_score
|
| 73 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def _grade_market_dominator(s: SimState) -> float:
|
| 77 |
+
"""Hard task: revenue, ROI, brand trajectory, adaptability, compliance."""
|
| 78 |
+
revenue_target = 400000.0
|
| 79 |
+
rev_score = min(1.0, s.total_revenue / revenue_target)
|
| 80 |
+
|
| 81 |
+
# risk-adjusted ROI
|
| 82 |
+
roi = s.total_revenue / max(s.total_spend, 1.0)
|
| 83 |
+
roi_score = min(1.0, roi / 4.0)
|
| 84 |
+
|
| 85 |
+
# brand trajectory (improving over time)
|
| 86 |
+
brand_scores = s.weekly_brand_scores
|
| 87 |
+
if len(brand_scores) >= 4:
|
| 88 |
+
first_quarter = sum(brand_scores[: len(brand_scores) // 4]) / max(len(brand_scores) // 4, 1)
|
| 89 |
+
last_quarter = sum(brand_scores[-len(brand_scores) // 4 :]) / max(len(brand_scores) // 4, 1)
|
| 90 |
+
trajectory = min(1.0, max(0.0, (last_quarter - first_quarter + 10) / 20.0))
|
| 91 |
+
else:
|
| 92 |
+
trajectory = 0.5
|
| 93 |
+
|
| 94 |
+
# adaptability: performance recovery after regime shifts
|
| 95 |
+
revenues = s.weekly_revenues
|
| 96 |
+
if len(revenues) >= 18:
|
| 97 |
+
pre_shift = sum(revenues[8:12]) / 4 if len(revenues) > 12 else 1.0
|
| 98 |
+
post_shift = sum(revenues[13:17]) / 4 if len(revenues) > 17 else 0.0
|
| 99 |
+
adapt_score = min(1.0, post_shift / max(pre_shift, 1.0))
|
| 100 |
+
else:
|
| 101 |
+
adapt_score = 0.5
|
| 102 |
+
|
| 103 |
+
# compliance
|
| 104 |
+
compliance_score = max(0.0, 1.0 - s.compliance_violations * 0.03)
|
| 105 |
+
|
| 106 |
+
score = (
|
| 107 |
+
0.35 * rev_score
|
| 108 |
+
+ 0.25 * roi_score
|
| 109 |
+
+ 0.20 * trajectory
|
| 110 |
+
+ 0.10 * adapt_score
|
| 111 |
+
+ 0.10 * compliance_score
|
| 112 |
+
)
|
| 113 |
+
return round(max(0.0, min(1.0, score)), 4)
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
# ββ Task configurations βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 117 |
+
|
| 118 |
+
TASK_CHANNEL_OPTIMIZER = TaskDefinition(
|
| 119 |
+
task_id="channel_optimizer",
|
| 120 |
+
name="Channel Optimizer",
|
| 121 |
+
difficulty="easy",
|
| 122 |
+
description=(
|
| 123 |
+
"Maximize revenue by allocating budget across 3 marketing channels "
|
| 124 |
+
"targeting 2 customer segments over 12 weeks. Focus on finding the "
|
| 125 |
+
"right channel-segment fit."
|
| 126 |
+
),
|
| 127 |
+
total_weeks=12,
|
| 128 |
+
total_budget=50000.0,
|
| 129 |
+
channels=[
|
| 130 |
+
ChannelConfig(
|
| 131 |
+
name="paid_search",
|
| 132 |
+
base_ctr=0.012,
|
| 133 |
+
base_cvr=0.025,
|
| 134 |
+
saturation_alpha=1.5,
|
| 135 |
+
cost_per_impression=18.0,
|
| 136 |
+
min_spend_for_signal=200.0,
|
| 137 |
+
segment_affinity={"startup_founders": 1.4, "smb_owners": 1.0},
|
| 138 |
+
),
|
| 139 |
+
ChannelConfig(
|
| 140 |
+
name="paid_social",
|
| 141 |
+
base_ctr=0.008,
|
| 142 |
+
base_cvr=0.015,
|
| 143 |
+
saturation_alpha=2.0,
|
| 144 |
+
cost_per_impression=12.0,
|
| 145 |
+
min_spend_for_signal=150.0,
|
| 146 |
+
segment_affinity={"startup_founders": 1.2, "smb_owners": 0.8},
|
| 147 |
+
),
|
| 148 |
+
ChannelConfig(
|
| 149 |
+
name="email_lifecycle",
|
| 150 |
+
base_ctr=0.025,
|
| 151 |
+
base_cvr=0.035,
|
| 152 |
+
saturation_alpha=1.0,
|
| 153 |
+
cost_per_impression=5.0,
|
| 154 |
+
min_spend_for_signal=100.0,
|
| 155 |
+
segment_affinity={"startup_founders": 0.9, "smb_owners": 1.5},
|
| 156 |
+
),
|
| 157 |
+
],
|
| 158 |
+
segments=[
|
| 159 |
+
SegmentConfig(
|
| 160 |
+
name="startup_founders",
|
| 161 |
+
size=0.6,
|
| 162 |
+
price_sensitivity=0.7,
|
| 163 |
+
message_preference={
|
| 164 |
+
"cost_savings": 0.1, "performance": 0.3, "reliability": 0.1,
|
| 165 |
+
"innovation": 0.3, "ease_of_use": 0.15, "security": 0.05,
|
| 166 |
+
},
|
| 167 |
+
base_churn=0.08,
|
| 168 |
+
),
|
| 169 |
+
SegmentConfig(
|
| 170 |
+
name="smb_owners",
|
| 171 |
+
size=0.4,
|
| 172 |
+
price_sensitivity=0.5,
|
| 173 |
+
message_preference={
|
| 174 |
+
"cost_savings": 0.25, "performance": 0.15, "reliability": 0.25,
|
| 175 |
+
"innovation": 0.05, "ease_of_use": 0.2, "security": 0.1,
|
| 176 |
+
},
|
| 177 |
+
base_churn=0.05,
|
| 178 |
+
),
|
| 179 |
+
],
|
| 180 |
+
product=ProductConfig(base_price=99.0, differentiation=0.7, complexity=0.3),
|
| 181 |
+
noise_level=0.1,
|
| 182 |
+
enable_competitor=False,
|
| 183 |
+
enable_regime_shifts=False,
|
| 184 |
+
revenue_target=120000.0,
|
| 185 |
+
available_experiments=[],
|
| 186 |
+
available_pricing_actions=[],
|
| 187 |
+
grader=_grade_channel_optimizer,
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
TASK_GROWTH_STRATEGIST = TaskDefinition(
|
| 191 |
+
task_id="growth_strategist",
|
| 192 |
+
name="Growth Strategist",
|
| 193 |
+
difficulty="medium",
|
| 194 |
+
description=(
|
| 195 |
+
"Maximize revenue while maintaining brand health and budget efficiency. "
|
| 196 |
+
"Manage 5 channels, 3 segments, run experiments, and adjust pricing "
|
| 197 |
+
"over 24 weeks. Balance short-term revenue with long-term brand building."
|
| 198 |
+
),
|
| 199 |
+
total_weeks=24,
|
| 200 |
+
total_budget=150000.0,
|
| 201 |
+
channels=[
|
| 202 |
+
ChannelConfig(
|
| 203 |
+
name="paid_search", base_ctr=0.012, base_cvr=0.022,
|
| 204 |
+
saturation_alpha=1.5, cost_per_impression=20.0, min_spend_for_signal=200.0,
|
| 205 |
+
segment_affinity={"startup_founders": 1.4, "smb_owners": 1.0, "enterprise": 0.7},
|
| 206 |
+
),
|
| 207 |
+
ChannelConfig(
|
| 208 |
+
name="paid_social", base_ctr=0.008, base_cvr=0.012,
|
| 209 |
+
saturation_alpha=2.0, cost_per_impression=14.0, min_spend_for_signal=150.0,
|
| 210 |
+
segment_affinity={"startup_founders": 1.3, "smb_owners": 0.8, "enterprise": 0.5},
|
| 211 |
+
),
|
| 212 |
+
ChannelConfig(
|
| 213 |
+
name="organic_content", base_ctr=0.006, base_cvr=0.030,
|
| 214 |
+
saturation_alpha=0.8, cost_per_impression=8.0, min_spend_for_signal=300.0,
|
| 215 |
+
segment_affinity={"startup_founders": 1.1, "smb_owners": 1.2, "enterprise": 1.3},
|
| 216 |
+
),
|
| 217 |
+
ChannelConfig(
|
| 218 |
+
name="email_lifecycle", base_ctr=0.025, base_cvr=0.030,
|
| 219 |
+
saturation_alpha=1.0, cost_per_impression=5.0, min_spend_for_signal=100.0,
|
| 220 |
+
segment_affinity={"startup_founders": 0.9, "smb_owners": 1.5, "enterprise": 1.1},
|
| 221 |
+
),
|
| 222 |
+
ChannelConfig(
|
| 223 |
+
name="outbound_sales", base_ctr=0.003, base_cvr=0.045,
|
| 224 |
+
saturation_alpha=1.2, cost_per_impression=50.0, min_spend_for_signal=500.0,
|
| 225 |
+
segment_affinity={"startup_founders": 0.5, "smb_owners": 0.9, "enterprise": 1.8},
|
| 226 |
+
),
|
| 227 |
+
],
|
| 228 |
+
segments=[
|
| 229 |
+
SegmentConfig(
|
| 230 |
+
name="startup_founders", size=0.4, price_sensitivity=0.7,
|
| 231 |
+
message_preference={
|
| 232 |
+
"cost_savings": 0.1, "performance": 0.3, "reliability": 0.1,
|
| 233 |
+
"innovation": 0.3, "ease_of_use": 0.15, "security": 0.05,
|
| 234 |
+
},
|
| 235 |
+
base_churn=0.08,
|
| 236 |
+
),
|
| 237 |
+
SegmentConfig(
|
| 238 |
+
name="smb_owners", size=0.35, price_sensitivity=0.5,
|
| 239 |
+
message_preference={
|
| 240 |
+
"cost_savings": 0.25, "performance": 0.15, "reliability": 0.25,
|
| 241 |
+
"innovation": 0.05, "ease_of_use": 0.2, "security": 0.1,
|
| 242 |
+
},
|
| 243 |
+
base_churn=0.05,
|
| 244 |
+
),
|
| 245 |
+
SegmentConfig(
|
| 246 |
+
name="enterprise", size=0.25, price_sensitivity=0.2,
|
| 247 |
+
message_preference={
|
| 248 |
+
"cost_savings": 0.05, "performance": 0.15, "reliability": 0.3,
|
| 249 |
+
"innovation": 0.1, "ease_of_use": 0.1, "security": 0.3,
|
| 250 |
+
},
|
| 251 |
+
base_churn=0.03,
|
| 252 |
+
),
|
| 253 |
+
],
|
| 254 |
+
product=ProductConfig(base_price=149.0, differentiation=0.65, complexity=0.5),
|
| 255 |
+
noise_level=0.15,
|
| 256 |
+
enable_competitor=False,
|
| 257 |
+
enable_regime_shifts=False,
|
| 258 |
+
revenue_target=375000.0,
|
| 259 |
+
available_experiments=EXPERIMENT_TYPES,
|
| 260 |
+
available_pricing_actions=PRICING_ACTIONS,
|
| 261 |
+
grader=_grade_growth_strategist,
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
TASK_MARKET_DOMINATOR = TaskDefinition(
|
| 265 |
+
task_id="market_dominator",
|
| 266 |
+
name="Market Dominator",
|
| 267 |
+
difficulty="hard",
|
| 268 |
+
description=(
|
| 269 |
+
"Maximize long-term revenue under adversarial conditions. "
|
| 270 |
+
"Manage 7 channels, 4 segments with an active competitor and "
|
| 271 |
+
"market regime shifts. Avoid compliance traps. 36 weeks, high noise."
|
| 272 |
+
),
|
| 273 |
+
total_weeks=36,
|
| 274 |
+
total_budget=300000.0,
|
| 275 |
+
channels=[
|
| 276 |
+
ChannelConfig(
|
| 277 |
+
name="paid_search", base_ctr=0.010, base_cvr=0.018,
|
| 278 |
+
saturation_alpha=1.8, cost_per_impression=22.0, min_spend_for_signal=250.0,
|
| 279 |
+
segment_affinity={
|
| 280 |
+
"startup_founders": 1.3, "smb_owners": 1.0, "enterprise": 0.7, "developer": 1.1,
|
| 281 |
+
},
|
| 282 |
+
),
|
| 283 |
+
ChannelConfig(
|
| 284 |
+
name="paid_social", base_ctr=0.007, base_cvr=0.010,
|
| 285 |
+
saturation_alpha=2.2, cost_per_impression=16.0, min_spend_for_signal=200.0,
|
| 286 |
+
segment_affinity={
|
| 287 |
+
"startup_founders": 1.3, "smb_owners": 0.7, "enterprise": 0.4, "developer": 1.0,
|
| 288 |
+
},
|
| 289 |
+
),
|
| 290 |
+
ChannelConfig(
|
| 291 |
+
name="organic_content", base_ctr=0.005, base_cvr=0.025,
|
| 292 |
+
saturation_alpha=0.8, cost_per_impression=10.0, min_spend_for_signal=350.0,
|
| 293 |
+
segment_affinity={
|
| 294 |
+
"startup_founders": 1.1, "smb_owners": 1.1, "enterprise": 1.2, "developer": 1.5,
|
| 295 |
+
},
|
| 296 |
+
),
|
| 297 |
+
ChannelConfig(
|
| 298 |
+
name="email_lifecycle", base_ctr=0.020, base_cvr=0.025,
|
| 299 |
+
saturation_alpha=1.0, cost_per_impression=6.0, min_spend_for_signal=100.0,
|
| 300 |
+
segment_affinity={
|
| 301 |
+
"startup_founders": 0.9, "smb_owners": 1.4, "enterprise": 1.0, "developer": 0.8,
|
| 302 |
+
},
|
| 303 |
+
),
|
| 304 |
+
ChannelConfig(
|
| 305 |
+
name="outbound_sales", base_ctr=0.003, base_cvr=0.040,
|
| 306 |
+
saturation_alpha=1.5, cost_per_impression=55.0, min_spend_for_signal=600.0,
|
| 307 |
+
segment_affinity={
|
| 308 |
+
"startup_founders": 0.4, "smb_owners": 0.8, "enterprise": 1.9, "developer": 0.3,
|
| 309 |
+
},
|
| 310 |
+
),
|
| 311 |
+
ChannelConfig(
|
| 312 |
+
name="partnerships", base_ctr=0.004, base_cvr=0.035,
|
| 313 |
+
saturation_alpha=1.0, cost_per_impression=35.0, min_spend_for_signal=400.0,
|
| 314 |
+
segment_affinity={
|
| 315 |
+
"startup_founders": 1.0, "smb_owners": 1.2, "enterprise": 1.5, "developer": 1.1,
|
| 316 |
+
},
|
| 317 |
+
),
|
| 318 |
+
ChannelConfig(
|
| 319 |
+
name="influencer_marketing", base_ctr=0.009, base_cvr=0.015,
|
| 320 |
+
saturation_alpha=2.5, cost_per_impression=25.0, min_spend_for_signal=300.0,
|
| 321 |
+
segment_affinity={
|
| 322 |
+
"startup_founders": 1.5, "smb_owners": 0.6, "enterprise": 0.3, "developer": 1.4,
|
| 323 |
+
},
|
| 324 |
+
),
|
| 325 |
+
],
|
| 326 |
+
segments=[
|
| 327 |
+
SegmentConfig(
|
| 328 |
+
name="startup_founders", size=0.3, price_sensitivity=0.7,
|
| 329 |
+
message_preference={
|
| 330 |
+
"cost_savings": 0.1, "performance": 0.3, "reliability": 0.1,
|
| 331 |
+
"innovation": 0.3, "ease_of_use": 0.15, "security": 0.05,
|
| 332 |
+
},
|
| 333 |
+
base_churn=0.08,
|
| 334 |
+
),
|
| 335 |
+
SegmentConfig(
|
| 336 |
+
name="smb_owners", size=0.25, price_sensitivity=0.5,
|
| 337 |
+
message_preference={
|
| 338 |
+
"cost_savings": 0.25, "performance": 0.15, "reliability": 0.25,
|
| 339 |
+
"innovation": 0.05, "ease_of_use": 0.2, "security": 0.1,
|
| 340 |
+
},
|
| 341 |
+
base_churn=0.05,
|
| 342 |
+
),
|
| 343 |
+
SegmentConfig(
|
| 344 |
+
name="enterprise", size=0.2, price_sensitivity=0.15,
|
| 345 |
+
message_preference={
|
| 346 |
+
"cost_savings": 0.05, "performance": 0.15, "reliability": 0.3,
|
| 347 |
+
"innovation": 0.1, "ease_of_use": 0.1, "security": 0.3,
|
| 348 |
+
},
|
| 349 |
+
base_churn=0.02,
|
| 350 |
+
),
|
| 351 |
+
SegmentConfig(
|
| 352 |
+
name="developer", size=0.25, price_sensitivity=0.6,
|
| 353 |
+
message_preference={
|
| 354 |
+
"cost_savings": 0.05, "performance": 0.35, "reliability": 0.1,
|
| 355 |
+
"innovation": 0.25, "ease_of_use": 0.2, "security": 0.05,
|
| 356 |
+
},
|
| 357 |
+
base_churn=0.1,
|
| 358 |
+
),
|
| 359 |
+
],
|
| 360 |
+
product=ProductConfig(base_price=199.0, differentiation=0.6, complexity=0.6),
|
| 361 |
+
noise_level=0.25,
|
| 362 |
+
enable_competitor=True,
|
| 363 |
+
enable_regime_shifts=True,
|
| 364 |
+
revenue_target=400000.0,
|
| 365 |
+
available_experiments=EXPERIMENT_TYPES,
|
| 366 |
+
available_pricing_actions=PRICING_ACTIONS,
|
| 367 |
+
grader=_grade_market_dominator,
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
|
| 371 |
+
# ββ Registry βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 372 |
+
|
| 373 |
+
TASKS: Dict[str, TaskDefinition] = {
|
| 374 |
+
"channel_optimizer": TASK_CHANNEL_OPTIMIZER,
|
| 375 |
+
"growth_strategist": TASK_GROWTH_STRATEGIST,
|
| 376 |
+
"market_dominator": TASK_MARKET_DOMINATOR,
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
|
| 380 |
+
def get_task(task_id: str) -> TaskDefinition:
|
| 381 |
+
if task_id not in TASKS:
|
| 382 |
+
raise ValueError(f"Unknown task_id '{task_id}'. Available: {list(TASKS.keys())}")
|
| 383 |
+
return TASKS[task_id]
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def create_simulator(task_id: str, seed: int | None = None) -> MarketSimulator:
|
| 387 |
+
"""Create a MarketSimulator configured for the given task."""
|
| 388 |
+
t = get_task(task_id)
|
| 389 |
+
return MarketSimulator(
|
| 390 |
+
channels=t.channels,
|
| 391 |
+
segments=t.segments,
|
| 392 |
+
product=t.product,
|
| 393 |
+
total_weeks=t.total_weeks,
|
| 394 |
+
total_budget=t.total_budget,
|
| 395 |
+
noise_level=t.noise_level,
|
| 396 |
+
enable_competitor=t.enable_competitor,
|
| 397 |
+
enable_regime_shifts=t.enable_regime_shifts,
|
| 398 |
+
seed=seed,
|
| 399 |
+
)
|