Spaces:
Sleeping
Sleeping
| """ | |
| AdRL Studio β Contextual Bandit Ad Recommendation Engine | |
| This application implements and benchmarks four reinforcement learning | |
| contextual bandit algorithms for ad recommendation: (1) Ξ΅-Greedy Neural | |
| Bandit using a shared PyTorch MLP, (2) UCB1 (Upper Confidence Bound), | |
| a non-contextual baseline, (3) Thompson Sampling with Beta distribution | |
| priors, and (4) LinUCB Disjoint Model, the industry-standard contextual | |
| bandit used in production ad systems. The simulated environment features | |
| 20 ads across 5 categories and 5 user context features (age group, device, | |
| time of day, content category, region) encoded as a 19-dimensional one-hot | |
| vector. True click-through rates are determined by hidden weight vectors | |
| initialized at startup (seed=42). Algorithms observe only bandit feedback | |
| β the reward for the chosen arm only β and must balance exploration | |
| vs. exploitation to minimize cumulative regret. | |
| """ | |
| import json | |
| import math | |
| import threading | |
| import numpy as np | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from flask import Flask, Response, jsonify, render_template_string, request | |
| from scipy import stats | |
| app = Flask(__name__) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Environment constants | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| np.random.seed(42) | |
| AGE_GROUPS = ["young_adult", "adult", "senior"] | |
| DEVICES = ["mobile", "desktop", "tablet"] | |
| TIMES_OF_DAY = ["morning", "afternoon", "evening", "night"] | |
| CONTENT_CATS = ["tech", "sports", "lifestyle", "news", "entertainment"] | |
| REGIONS = ["north_america", "europe", "asia", "other"] | |
| CONTEXT_DIM = len(AGE_GROUPS) + len(DEVICES) + len(TIMES_OF_DAY) + len(CONTENT_CATS) + len(REGIONS) # 19 | |
| N_ADS = 20 | |
| AD_IDS = [f"ad_{i:02d}" for i in range(1, 21)] | |
| # Category mapping | |
| AD_CAT_MAP = {} | |
| for i, ad in enumerate(AD_IDS): | |
| cats = ["Tech","Fashion","Finance","Food","Travel"] | |
| AD_CAT_MAP[ad] = cats[i // 4] | |
| AD_FORMATS = { | |
| "ad_01":"banner","ad_02":"video","ad_03":"native","ad_04":"banner", | |
| "ad_05":"banner","ad_06":"video","ad_07":"banner","ad_08":"native", | |
| "ad_09":"native","ad_10":"banner","ad_11":"video","ad_12":"native", | |
| "ad_13":"banner","ad_14":"native","ad_15":"banner","ad_16":"video", | |
| "ad_17":"video","ad_18":"banner","ad_19":"native","ad_20":"video", | |
| } | |
| AD_BIDS = { | |
| "ad_01":2.50,"ad_02":3.00,"ad_03":3.50,"ad_04":4.00, | |
| "ad_05":1.50,"ad_06":2.00,"ad_07":2.50,"ad_08":3.00, | |
| "ad_09":3.00,"ad_10":3.50,"ad_11":4.00,"ad_12":5.00, | |
| "ad_13":1.00,"ad_14":1.50,"ad_15":2.00,"ad_16":2.50, | |
| "ad_17":2.00,"ad_18":2.50,"ad_19":3.00,"ad_20":3.50, | |
| } | |
| # Hidden true CTR weights β fixed at startup, never exposed to algorithms | |
| _TRUE_WEIGHTS = np.random.randn(N_ADS, CONTEXT_DIM) * 0.3 | |
| def _sigmoid(x): | |
| return 1.0 / (1.0 + np.exp(-np.clip(x, -20, 20))) | |
| def true_ctr(ad_idx, ctx): | |
| return float(np.clip(_sigmoid(ctx @ _TRUE_WEIGHTS[ad_idx]), 0.02, 0.25)) | |
| def encode_context(age, device, tod, content, region): | |
| vec = np.zeros(CONTEXT_DIM, dtype=np.float32) | |
| offset = 0 | |
| vec[offset + AGE_GROUPS.index(age)] = 1.0; offset += len(AGE_GROUPS) | |
| vec[offset + DEVICES.index(device)] = 1.0; offset += len(DEVICES) | |
| vec[offset + TIMES_OF_DAY.index(tod)] = 1.0; offset += len(TIMES_OF_DAY) | |
| vec[offset + CONTENT_CATS.index(content)] = 1.0; offset += len(CONTENT_CATS) | |
| vec[offset + REGIONS.index(region)] = 1.0 | |
| return vec | |
| def sample_random_context(): | |
| return encode_context( | |
| np.random.choice(AGE_GROUPS), np.random.choice(DEVICES), | |
| np.random.choice(TIMES_OF_DAY), np.random.choice(CONTENT_CATS), | |
| np.random.choice(REGIONS), | |
| ) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Algorithm classes | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class EpsilonGreedyNeuralBandit: | |
| NAME = "Ξ΅-Greedy" | |
| COLOR = "#f59e0b" | |
| def __init__(self, epsilon=0.15, epsilon_min=0.01, decay=0.995, lr=0.01): | |
| self.epsilon_0 = epsilon | |
| self.epsilon_min = epsilon_min | |
| self.decay = decay | |
| self.lr = lr | |
| self.reset() | |
| def reset(self): | |
| self.t = 0 | |
| self.n_updates = 0 | |
| self.model = nn.Sequential( | |
| nn.Linear(CONTEXT_DIM + N_ADS, 32), nn.ReLU(), | |
| nn.Linear(32, 16), nn.ReLU(), | |
| nn.Linear(16, 1), nn.Sigmoid(), | |
| ) | |
| self.optimizer = optim.SGD(self.model.parameters(), lr=self.lr) | |
| self.criterion = nn.MSELoss() | |
| def _inp(self, ctx, ad_idx): | |
| oh = np.zeros(N_ADS, dtype=np.float32); oh[ad_idx] = 1.0 | |
| return torch.FloatTensor(np.concatenate([ctx, oh])) | |
| def _pred(self, ctx, ad_idx): | |
| self.model.eval() | |
| with torch.no_grad(): | |
| return self.model(self._inp(ctx, ad_idx)).item() | |
| def select(self, ctx): | |
| eps = max(self.epsilon_min, self.epsilon_0 * (self.decay ** self.t)) | |
| if np.random.rand() < eps: | |
| return int(np.random.randint(N_ADS)) | |
| ctx_rep = np.tile(ctx, (N_ADS, 1)) | |
| ad_eye = np.eye(N_ADS, dtype=np.float32) | |
| batch = torch.FloatTensor(np.hstack([ctx_rep, ad_eye])) | |
| self.model.eval() | |
| with torch.no_grad(): | |
| scores = self.model(batch).squeeze().numpy() | |
| return int(np.argmax(scores)) | |
| def predict_ctr(self, ctx, ad_idx): | |
| return self._pred(ctx, ad_idx) | |
| def update(self, ctx, action, reward): | |
| self.model.train() | |
| x = self._inp(ctx, action).unsqueeze(0) | |
| y = torch.FloatTensor([[float(reward)]]) | |
| self.optimizer.zero_grad() | |
| self.criterion(self.model(x), y).backward() | |
| self.optimizer.step() | |
| self.t += 1 | |
| self.n_updates += 1 | |
| class UCB1Bandit: | |
| NAME = "UCB1" | |
| COLOR = "#10b981" | |
| def __init__(self): | |
| self.reset() | |
| def reset(self): | |
| self.n_a = np.zeros(N_ADS) | |
| self.R_a = np.zeros(N_ADS) | |
| self.t = 0 | |
| self._init_idx = 0 | |
| self.n_updates = 0 | |
| def select(self, ctx): | |
| if self._init_idx < N_ADS: | |
| return self._init_idx | |
| mu = self.R_a / np.maximum(self.n_a, 1) | |
| bonus = np.sqrt(2.0 * np.log(max(self.t, 1)) / np.maximum(self.n_a, 1)) | |
| return int(np.argmax(mu + bonus)) | |
| def predict_ctr(self, ctx, ad_idx): | |
| if self.n_a[ad_idx] == 0: | |
| return 0.0 | |
| return float(self.R_a[ad_idx] / self.n_a[ad_idx]) | |
| def update(self, ctx, action, reward): | |
| if self._init_idx < N_ADS: | |
| self._init_idx += 1 | |
| self.n_a[action] += 1 | |
| self.R_a[action] += reward | |
| self.t += 1 | |
| self.n_updates += 1 | |
| class ThompsonSamplingBandit: | |
| NAME = "Thompson" | |
| COLOR = "#3b82f6" | |
| def __init__(self): | |
| self.reset() | |
| def reset(self): | |
| self.alpha = np.ones(N_ADS) | |
| self.beta_p = np.ones(N_ADS) | |
| self.n_updates = 0 | |
| def select(self, ctx): | |
| return int(np.argmax(np.random.beta(self.alpha, self.beta_p))) | |
| def predict_ctr(self, ctx, ad_idx): | |
| return float(self.alpha[ad_idx] / (self.alpha[ad_idx] + self.beta_p[ad_idx])) | |
| def update(self, ctx, action, reward): | |
| if reward == 1: | |
| self.alpha[action] += 1 | |
| else: | |
| self.beta_p[action] += 1 | |
| self.n_updates += 1 | |
| class LinUCBBandit: | |
| NAME = "LinUCB" | |
| COLOR = "#ef4444" | |
| def __init__(self, alpha=1.0): | |
| self.alpha = alpha | |
| self.reset() | |
| def reset(self): | |
| d = CONTEXT_DIM | |
| self.A = [np.identity(d) for _ in range(N_ADS)] | |
| self.A_inv = [np.identity(d) for _ in range(N_ADS)] | |
| self.b = [np.zeros(d) for _ in range(N_ADS)] | |
| self.n_updates = 0 | |
| def _ucb_score(self, ctx, ad_idx): | |
| A_inv = self.A_inv[ad_idx] | |
| theta = A_inv @ self.b[ad_idx] | |
| x = ctx | |
| return float(theta @ x + self.alpha * math.sqrt(max(float(x @ A_inv @ x), 0.0))) | |
| def select(self, ctx): | |
| return int(np.argmax([self._ucb_score(ctx, a) for a in range(N_ADS)])) | |
| def predict_ctr(self, ctx, ad_idx): | |
| return float((self.A_inv[ad_idx] @ self.b[ad_idx]) @ ctx) | |
| def update(self, ctx, action, reward): | |
| x = ctx | |
| Ai = self.A_inv[action] | |
| Aix = Ai @ x | |
| self.A_inv[action] = Ai - np.outer(Aix, Aix) / (1.0 + x @ Aix) | |
| self.A[action] += np.outer(x, x) | |
| self.b[action] += reward * x | |
| self.n_updates += 1 | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Global state | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| ALGO_KEYS = ["epsilon_greedy", "ucb1", "thompson", "linucb"] | |
| ALGO_CLASSES = { | |
| "epsilon_greedy": EpsilonGreedyNeuralBandit, | |
| "ucb1": UCB1Bandit, | |
| "thompson": ThompsonSamplingBandit, | |
| "linucb": LinUCBBandit, | |
| } | |
| ALGO_DISPLAY = { | |
| "epsilon_greedy": "Ξ΅-Greedy", "ucb1": "UCB1", | |
| "thompson": "Thompson", "linucb": "LinUCB", | |
| } | |
| ALGO_COLORS = { | |
| "epsilon_greedy": "#f59e0b", "ucb1": "#10b981", | |
| "thompson": "#3b82f6", "linucb": "#ef4444", | |
| } | |
| algorithms = {k: cls() for k, cls in ALGO_CLASSES.items()} | |
| sim_lock = threading.Lock() | |
| sim_state = {"running": False, "step": 0, "total": 0, "last_results": None} | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HTML Template | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| TEMPLATE = """<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"/> | |
| <meta name="viewport" content="width=device-width,initial-scale=1.0"/> | |
| <title>AdRL Studio</title> | |
| <script src="https://cdn.plot.ly/plotly-2.27.0.min.js"></script> | |
| <style> | |
| *{margin:0;padding:0;box-sizing:border-box;} | |
| body{font-family:'Segoe UI',sans-serif;background:#0f0f1a;color:#e2e8f0;display:flex;height:100vh;overflow:hidden;} | |
| /* Sidebar */ | |
| #sidebar{width:240px;min-width:240px;background:#1a1a2e;display:flex;flex-direction:column;padding:0;border-right:1px solid #2d2d4e;} | |
| #sidebar-header{padding:24px 20px 16px;border-bottom:1px solid #2d2d4e;} | |
| #sidebar-header h1{font-size:1.2rem;font-weight:700;color:#fff;letter-spacing:.5px;} | |
| #sidebar-header p{font-size:.72rem;color:#7c3aed;margin-top:4px;} | |
| #nav{padding:12px 0;flex:1;} | |
| .nav-item{display:flex;align-items:center;gap:10px;padding:11px 20px;cursor:pointer;color:#94a3b8;font-size:.85rem;transition:all .2s;border-left:3px solid transparent;} | |
| .nav-item:hover{background:#252545;color:#e2e8f0;} | |
| .nav-item.active{background:#1e1b4b;color:#a78bfa;border-left:3px solid #7c3aed;} | |
| .nav-icon{font-size:1.1rem;width:20px;text-align:center;} | |
| /* Main */ | |
| #main{flex:1;display:flex;flex-direction:column;overflow:hidden;} | |
| #topbar{height:52px;background:#1a1a2e;border-bottom:1px solid #2d2d4e;display:flex;align-items:center;padding:0 24px;gap:12px;} | |
| #topbar-title{font-size:1rem;font-weight:600;color:#fff;} | |
| #status-dot{width:10px;height:10px;border-radius:50%;background:#22c55e;margin-left:auto;} | |
| #status-dot.running{background:#eab308;animation:pulse 1s infinite;} | |
| #status-label{font-size:.78rem;color:#94a3b8;} | |
| @keyframes pulse{0%,100%{opacity:1;}50%{opacity:.4;}} | |
| #content{flex:1;overflow-y:auto;padding:24px;} | |
| /* Cards */ | |
| .card{background:#16213e;border-radius:10px;padding:20px;margin-bottom:18px;border:1px solid #2d2d4e;} | |
| .card-title{font-size:.9rem;font-weight:600;color:#a78bfa;margin-bottom:14px;text-transform:uppercase;letter-spacing:.8px;} | |
| /* Grid */ | |
| .grid-2{display:grid;grid-template-columns:1fr 1fr;gap:16px;} | |
| .grid-4{display:grid;grid-template-columns:repeat(4,1fr);gap:14px;} | |
| /* Form controls */ | |
| .form-row{display:flex;gap:14px;flex-wrap:wrap;align-items:flex-end;margin-bottom:16px;} | |
| .form-group{display:flex;flex-direction:column;gap:5px;min-width:150px;} | |
| label{font-size:.78rem;color:#94a3b8;font-weight:500;} | |
| select,input[type=range]{background:#0f0f1a;border:1px solid #2d2d4e;color:#e2e8f0;border-radius:6px;padding:7px 10px;font-size:.82rem;outline:none;} | |
| select:focus{border-color:#7c3aed;} | |
| input[type=range]{padding:0;height:4px;accent-color:#7c3aed;width:100%;} | |
| .range-row{display:flex;justify-content:space-between;font-size:.75rem;color:#64748b;margin-top:2px;} | |
| /* Buttons */ | |
| .btn{background:#7c3aed;color:#fff;border:none;border-radius:7px;padding:9px 20px;font-size:.85rem;font-weight:600;cursor:pointer;transition:background .2s;} | |
| .btn:hover{background:#6d28d9;} | |
| .btn:disabled{background:#374151;cursor:not-allowed;} | |
| /* Algo cards */ | |
| .algo-card{background:#0f0f1a;border-radius:8px;padding:14px;border:1px solid #2d2d4e;} | |
| .algo-name{font-size:.8rem;font-weight:700;margin-bottom:6px;} | |
| .algo-ad{font-size:1.05rem;font-weight:600;color:#fff;margin-bottom:2px;} | |
| .algo-meta{font-size:.75rem;color:#94a3b8;} | |
| .algo-score{font-size:.8rem;margin-top:6px;} | |
| /* Table */ | |
| table{width:100%;border-collapse:collapse;font-size:.82rem;} | |
| th{background:#0f0f1a;color:#94a3b8;padding:8px 12px;text-align:left;font-weight:600;border-bottom:1px solid #2d2d4e;} | |
| td{padding:8px 12px;border-bottom:1px solid #1e293b;color:#e2e8f0;} | |
| tr:last-child td{border-bottom:none;} | |
| /* Progress bar */ | |
| .progress-bar{background:#1e293b;border-radius:4px;height:8px;overflow:hidden;margin:10px 0;} | |
| .progress-fill{height:100%;background:#7c3aed;transition:width .3s;border-radius:4px;} | |
| /* Tabs hidden by default */ | |
| .tab-pane{display:none;} | |
| .tab-pane.active{display:block;} | |
| /* Stat box */ | |
| .stat-box{background:#0f0f1a;border-radius:8px;padding:12px;text-align:center;} | |
| .stat-val{font-size:1.4rem;font-weight:700;color:#a78bfa;} | |
| .stat-lbl{font-size:.72rem;color:#64748b;margin-top:2px;} | |
| /* Verdict */ | |
| .verdict-sig{color:#22c55e;font-weight:700;} | |
| .verdict-ns{color:#ef4444;font-weight:700;} | |
| /* Lift row */ | |
| .lift-row{display:flex;gap:12px;flex-wrap:wrap;margin-bottom:16px;} | |
| .lift-box{flex:1;min-width:120px;background:#0f0f1a;border-radius:8px;padding:12px;text-align:center;} | |
| </style> | |
| </head> | |
| <body> | |
| <div id="sidebar"> | |
| <div id="sidebar-header"> | |
| <h1>🎬 AdRL Studio</h1> | |
| <p>Contextual Bandit Ad Engine</p> | |
| </div> | |
| <nav id="nav"> | |
| <div class="nav-item active" onclick="showTab('live')" id="nav-live"> | |
| <span class="nav-icon">🎯</span><span>Live Ad Serving</span> | |
| </div> | |
| <div class="nav-item" onclick="showTab('simulation')" id="nav-simulation"> | |
| <span class="nav-icon">▶</span><span>Online Learning</span> | |
| </div> | |
| <div class="nav-item" onclick="showTab('regret')" id="nav-regret"> | |
| <span class="nav-icon">📈</span><span>Regret Analysis</span> | |
| </div> | |
| <div class="nav-item" onclick="showTab('abtest')" id="nav-abtest"> | |
| <span class="nav-icon">⚖</span><span>A/B Test Simulator</span> | |
| </div> | |
| <div class="nav-item" onclick="showTab('heatmap')" id="nav-heatmap"> | |
| <span class="nav-icon">🌡</span><span>Reward Landscape</span> | |
| </div> | |
| </nav> | |
| </div> | |
| <div id="main"> | |
| <div id="topbar"> | |
| <span id="topbar-title">Live Ad Serving</span> | |
| <div id="status-dot"></div> | |
| <span id="status-label">Model Ready</span> | |
| </div> | |
| <div id="content"> | |
| <!-- TAB 1: Live Ad Serving --> | |
| <div class="tab-pane active" id="tab-live"> | |
| <div class="card"> | |
| <div class="card-title">🎓 User Context</div> | |
| <div class="form-row"> | |
| <div class="form-group"> | |
| <label>Age Group</label> | |
| <select id="ctx-age"> | |
| <option value="young_adult">Young Adult (18β34)</option> | |
| <option value="adult" selected>Adult (35β54)</option> | |
| <option value="senior">Senior (55+)</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label>Device</label> | |
| <select id="ctx-device"> | |
| <option value="mobile">Mobile</option> | |
| <option value="desktop" selected>Desktop</option> | |
| <option value="tablet">Tablet</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label>Time of Day</label> | |
| <select id="ctx-tod"> | |
| <option value="morning">Morning (6β12)</option> | |
| <option value="afternoon" selected>Afternoon (12β18)</option> | |
| <option value="evening">Evening (18β24)</option> | |
| <option value="night">Night (0β6)</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label>Content Category</label> | |
| <select id="ctx-content"> | |
| <option value="tech" selected>Tech</option> | |
| <option value="sports">Sports</option> | |
| <option value="lifestyle">Lifestyle</option> | |
| <option value="news">News</option> | |
| <option value="entertainment">Entertainment</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label>Region</label> | |
| <select id="ctx-region"> | |
| <option value="north_america" selected>North America</option> | |
| <option value="europe">Europe</option> | |
| <option value="asia">Asia</option> | |
| <option value="other">Other</option> | |
| </select> | |
| </div> | |
| <div class="form-group" style="justify-content:flex-end;"> | |
| <button class="btn" onclick="getRecommendations()">🔍 Get Recommendations</button> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="card"> | |
| <div class="card-title">🎭 Algorithm Recommendations</div> | |
| <div class="grid-4" id="rec-grid"> | |
| <div class="algo-card"><div class="algo-name" style="color:#f59e0b">Ξ΅-Greedy</div><div class="algo-ad" id="r-eg-ad">β</div><div class="algo-meta" id="r-eg-meta">β</div><div class="algo-score" id="r-eg-score">β</div></div> | |
| <div class="algo-card"><div class="algo-name" style="color:#10b981">UCB1</div><div class="algo-ad" id="r-ucb-ad">β</div><div class="algo-meta" id="r-ucb-meta">β</div><div class="algo-score" id="r-ucb-score">β</div></div> | |
| <div class="algo-card"><div class="algo-name" style="color:#3b82f6">Thompson</div><div class="algo-ad" id="r-ts-ad">β</div><div class="algo-meta" id="r-ts-meta">β</div><div class="algo-score" id="r-ts-score">β</div></div> | |
| <div class="algo-card"><div class="algo-name" style="color:#ef4444">LinUCB</div><div class="algo-ad" id="r-lu-ad">β</div><div class="algo-meta" id="r-lu-meta">β</div><div class="algo-score" id="r-lu-score">β</div></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- TAB 2: Online Learning Simulation --> | |
| <div class="tab-pane" id="tab-simulation"> | |
| <div class="card"> | |
| <div class="card-title">⚙ Simulation Settings</div> | |
| <div class="form-row"> | |
| <div class="form-group" style="flex:1;max-width:300px;"> | |
| <label>Impressions: <span id="n-val">3000</span></label> | |
| <input type="range" id="n-impressions" min="1000" max="10000" step="500" value="3000" | |
| oninput="document.getElementById('n-val').textContent=this.value"/> | |
| <div class="range-row"><span>1,000</span><span>10,000</span></div> | |
| </div> | |
| <div class="form-group" style="flex:1;max-width:300px;"> | |
| <label>Random Seed: <span id="seed-val">42</span></label> | |
| <input type="range" id="sim-seed" min="1" max="100" step="1" value="42" | |
| oninput="document.getElementById('seed-val').textContent=this.value"/> | |
| <div class="range-row"><span>1</span><span>100</span></div> | |
| </div> | |
| <div class="form-group" style="justify-content:flex-end;"> | |
| <button class="btn" id="run-sim-btn" onclick="runSimulation()">▶ Run Simulation</button> | |
| </div> | |
| </div> | |
| <div class="progress-bar" id="sim-progress-bar" style="display:none;"> | |
| <div class="progress-fill" id="sim-progress-fill" style="width:0%;"></div> | |
| </div> | |
| <div id="sim-progress-text" style="font-size:.78rem;color:#94a3b8;"></div> | |
| </div> | |
| <div class="card"> | |
| <div class="card-title">📈 Rolling CTR (100-impression window)</div> | |
| <div id="sim-chart" style="height:320px;"></div> | |
| </div> | |
| <div class="card"> | |
| <div class="card-title">📊 Simulation Summary</div> | |
| <div id="sim-table-container"><p style="color:#64748b;font-size:.82rem;">Run a simulation to see results.</p></div> | |
| </div> | |
| </div> | |
| <!-- TAB 3: Regret Analysis --> | |
| <div class="tab-pane" id="tab-regret"> | |
| <div class="card"> | |
| <div class="card-title">📉 Cumulative Regret Comparison</div> | |
| <p style="font-size:.78rem;color:#64748b;margin-bottom:12px;"> | |
| Cumulative regret measures the total reward missed vs. always picking the oracle best arm. | |
| Lower is better. LinUCB and Thompson typically achieve sub-linear regret. | |
| </p> | |
| <div id="regret-chart" style="height:340px;"></div> | |
| </div> | |
| <div class="card"> | |
| <div class="card-title">📋 Regret Summary</div> | |
| <div id="regret-table-container"><p style="color:#64748b;font-size:.82rem;">Run a simulation first (Online Learning tab).</p></div> | |
| </div> | |
| <div style="text-align:right;margin-top:-8px;"> | |
| <button class="btn" onclick="loadRegret()" style="font-size:.78rem;padding:7px 14px;">↻ Refresh Regret Data</button> | |
| </div> | |
| </div> | |
| <!-- TAB 4: A/B Test Simulator --> | |
| <div class="tab-pane" id="tab-abtest"> | |
| <div class="card"> | |
| <div class="card-title">⚖ A/B Test Settings</div> | |
| <div class="form-row"> | |
| <div class="form-group"> | |
| <label>Policy A</label> | |
| <select id="ab-policy-a"> | |
| <option value="linucb" selected>LinUCB</option> | |
| <option value="epsilon_greedy">Ξ΅-Greedy</option> | |
| <option value="ucb1">UCB1</option> | |
| <option value="thompson">Thompson</option> | |
| </select> | |
| </div> | |
| <div class="form-group"> | |
| <label>Policy B</label> | |
| <select id="ab-policy-b"> | |
| <option value="ucb1" selected>UCB1</option> | |
| <option value="epsilon_greedy">Ξ΅-Greedy</option> | |
| <option value="thompson">Thompson</option> | |
| <option value="linucb">LinUCB</option> | |
| </select> | |
| </div> | |
| <div class="form-group" style="flex:1;max-width:280px;"> | |
| <label>Impressions: <span id="ab-n-val">5000</span></label> | |
| <input type="range" id="ab-impressions" min="1000" max="20000" step="1000" value="5000" | |
| oninput="document.getElementById('ab-n-val').textContent=this.value"/> | |
| <div class="range-row"><span>1,000</span><span>20,000</span></div> | |
| </div> | |
| <div class="form-group" style="justify-content:flex-end;"> | |
| <button class="btn" id="run-ab-btn" onclick="runABTest()">⚖ Run A/B Test</button> | |
| </div> | |
| </div> | |
| </div> | |
| <div id="ab-results" style="display:none;"> | |
| <div class="card"> | |
| <div class="card-title">📊 A/B Test Results</div> | |
| <div class="lift-row"> | |
| <div class="lift-box"><div class="stat-val" id="ab-ctr-a">β</div><div class="stat-lbl" id="ab-lbl-a">Policy A CTR</div></div> | |
| <div class="lift-box"><div class="stat-val" id="ab-ctr-b">β</div><div class="stat-lbl" id="ab-lbl-b">Policy B CTR</div></div> | |
| <div class="lift-box"><div class="stat-val" id="ab-lift">β</div><div class="stat-lbl">Absolute Lift</div></div> | |
| <div class="lift-box"><div class="stat-val" id="ab-lift-rel">β</div><div class="stat-lbl">Relative Lift</div></div> | |
| </div> | |
| <div class="lift-row"> | |
| <div class="lift-box"><div class="stat-val" id="ab-z">β</div><div class="stat-lbl">Z-Statistic</div></div> | |
| <div class="lift-box"><div class="stat-val" id="ab-p">β</div><div class="stat-lbl">P-Value</div></div> | |
| <div class="lift-box"><div class="stat-val" id="ab-ci">β</div><div class="stat-lbl">95% CI (Lift)</div></div> | |
| <div class="lift-box" style="flex:2;"><div class="stat-val" id="ab-verdict">β</div><div class="stat-lbl">Verdict</div></div> | |
| </div> | |
| <div id="ab-chart" style="height:280px;margin-top:8px;"></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- TAB 5: Reward Landscape --> | |
| <div class="tab-pane" id="tab-heatmap"> | |
| <div class="card"> | |
| <div class="card-title">🌡 Reward Landscape Settings</div> | |
| <div class="form-row"> | |
| <div class="form-group"> | |
| <label>Algorithm</label> | |
| <select id="hm-algo"> | |
| <option value="linucb" selected>LinUCB</option> | |
| <option value="epsilon_greedy">Ξ΅-Greedy</option> | |
| <option value="ucb1">UCB1</option> | |
| <option value="thompson">Thompson</option> | |
| </select> | |
| </div> | |
| <div class="form-group" style="justify-content:flex-end;"> | |
| <button class="btn" onclick="loadHeatmap()">↻ Refresh Heatmap</button> | |
| </div> | |
| </div> | |
| <p style="font-size:.76rem;color:#64748b;">Estimated CTR for each user content category Γ ad category pair. Context held at: adult, desktop, afternoon, north_america.</p> | |
| </div> | |
| <div class="card"> | |
| <div class="card-title">📈 Estimated CTR Heatmap</div> | |
| <div id="heatmap-chart" style="height:380px;"></div> | |
| </div> | |
| </div> | |
| </div><!-- /content --> | |
| </div><!-- /main --> | |
| <script> | |
| // ββ Tab switching ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| const TAB_TITLES = { | |
| live:'Live Ad Serving', simulation:'Online Learning Simulation', | |
| regret:'Regret Analysis', abtest:'A/B Test Simulator', heatmap:'Reward Landscape' | |
| }; | |
| function showTab(name) { | |
| document.querySelectorAll('.tab-pane').forEach(p => p.classList.remove('active')); | |
| document.querySelectorAll('.nav-item').forEach(n => n.classList.remove('active')); | |
| document.getElementById('tab-' + name).classList.add('active'); | |
| document.getElementById('nav-' + name).classList.add('active'); | |
| document.getElementById('topbar-title').textContent = TAB_TITLES[name]; | |
| } | |
| // ββ Status polling βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function pollStatus() { | |
| fetch('/api/status').then(r => r.json()).then(d => { | |
| const dot = document.getElementById('status-dot'); | |
| const lbl = document.getElementById('status-label'); | |
| if (d.running) { | |
| dot.className = 'running'; dot.style.background = '#eab308'; | |
| lbl.textContent = 'Simulation Running (' + d.step + '/' + d.total + ')'; | |
| } else { | |
| dot.className = ''; dot.style.background = '#22c55e'; | |
| lbl.textContent = 'Model Ready'; | |
| } | |
| }).catch(() => {}); | |
| } | |
| setInterval(pollStatus, 2000); | |
| // ββ Tab 1: Recommendations βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function getRecommendations() { | |
| const body = { | |
| age: document.getElementById('ctx-age').value, | |
| device: document.getElementById('ctx-device').value, | |
| tod: document.getElementById('ctx-tod').value, | |
| content: document.getElementById('ctx-content').value, | |
| region: document.getElementById('ctx-region').value, | |
| }; | |
| const r = await fetch('/api/recommend', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify(body) | |
| }); | |
| const d = await r.json(); | |
| const keys = ['epsilon_greedy','ucb1','thompson','linucb']; | |
| const ids = ['eg','ucb','ts','lu']; | |
| keys.forEach((k, i) => { | |
| const rec = d[k]; | |
| document.getElementById('r-' + ids[i] + '-ad').textContent = rec.ad_id + ' (' + rec.category + ')'; | |
| document.getElementById('r-' + ids[i] + '-meta').textContent = rec.format + ' | $' + rec.bid.toFixed(2); | |
| document.getElementById('r-' + ids[i] + '-score').textContent = 'Est. CTR: ' + (rec.score * 100).toFixed(2) + '%'; | |
| }); | |
| } | |
| // ββ Tab 2: Simulation ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| let simRollingData = {}; | |
| async function runSimulation() { | |
| const n = parseInt(document.getElementById('n-impressions').value); | |
| const seed = parseInt(document.getElementById('sim-seed').value); | |
| const btn = document.getElementById('run-sim-btn'); | |
| const bar = document.getElementById('sim-progress-bar'); | |
| const fill = document.getElementById('sim-progress-fill'); | |
| const txt = document.getElementById('sim-progress-text'); | |
| btn.disabled = true; | |
| bar.style.display = 'block'; | |
| fill.style.width = '0%'; | |
| txt.textContent = 'Starting simulationβ¦'; | |
| simRollingData = {epsilon_greedy:[], ucb1:[], thompson:[], linucb:[], steps:[]}; | |
| try { | |
| const resp = await fetch('/api/simulate', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify({n_impressions: n, seed: seed}) | |
| }); | |
| const reader = resp.body.getReader(); | |
| const dec = new TextDecoder(); | |
| let buf = ''; | |
| while (true) { | |
| const {done, value} = await reader.read(); | |
| if (done) break; | |
| buf += dec.decode(value, {stream: true}); | |
| const parts = buf.split('\n\n'); | |
| buf = parts.pop(); | |
| for (const part of parts) { | |
| const line = part.trim(); | |
| if (!line.startsWith('data:')) continue; | |
| const payload = JSON.parse(line.slice(5).trim()); | |
| const pct = Math.round(payload.step / payload.total * 100); | |
| fill.style.width = pct + '%'; | |
| txt.textContent = 'Step ' + payload.step + ' / ' + payload.total; | |
| if (payload.done) { | |
| renderSimCharts(payload); | |
| renderSimTable(payload); | |
| btn.disabled = false; | |
| txt.textContent = 'Simulation complete β ' + payload.n_impressions + ' impressions.'; | |
| } | |
| } | |
| } | |
| } catch(e) { | |
| txt.textContent = 'Error: ' + e.message; | |
| btn.disabled = false; | |
| } | |
| } | |
| function renderSimCharts(d) { | |
| const traces = [ | |
| {x: d.steps, y: d.rolling_ctr.epsilon_greedy, name:'Ξ΅-Greedy', line:{color:'#f59e0b'}}, | |
| {x: d.steps, y: d.rolling_ctr.ucb1, name:'UCB1', line:{color:'#10b981'}}, | |
| {x: d.steps, y: d.rolling_ctr.thompson, name:'Thompson', line:{color:'#3b82f6'}}, | |
| {x: d.steps, y: d.rolling_ctr.linucb, name:'LinUCB', line:{color:'#ef4444'}}, | |
| ]; | |
| Plotly.react('sim-chart', traces, { | |
| template:'plotly_dark', paper_bgcolor:'#16213e', plot_bgcolor:'#0f0f1a', | |
| margin:{t:10,b:40,l:50,r:10}, autosize:true, | |
| xaxis:{title:'Impression', color:'#94a3b8', gridcolor:'#1e293b'}, | |
| yaxis:{title:'Rolling CTR', color:'#94a3b8', gridcolor:'#1e293b'}, | |
| legend:{bgcolor:'#16213e', font:{color:'#e2e8f0'}}, | |
| }, {responsive:true}); | |
| } | |
| function renderSimTable(d) { | |
| const keys = ['epsilon_greedy','ucb1','thompson','linucb']; | |
| const names = {'epsilon_greedy':'Ξ΅-Greedy','ucb1':'UCB1','thompson':'Thompson','linucb':'LinUCB'}; | |
| const colors = {'epsilon_greedy':'#f59e0b','ucb1':'#10b981','thompson':'#3b82f6','linucb':'#ef4444'}; | |
| let html = '<table><thead><tr><th>Algorithm</th><th>Final CTR</th><th>Total Reward</th><th>Policy Updates</th></tr></thead><tbody>'; | |
| keys.forEach(k => { | |
| html += '<tr><td style="color:' + colors[k] + ';font-weight:600;">' + names[k] + '</td>' | |
| + '<td>' + (d.final_ctr[k] * 100).toFixed(2) + '%</td>' | |
| + '<td>' + d.total_reward[k] + '</td>' | |
| + '<td>' + d.n_updates[k] + '</td></tr>'; | |
| }); | |
| html += '</tbody></table>'; | |
| document.getElementById('sim-table-container').innerHTML = html; | |
| } | |
| // ββ Tab 3: Regret ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function loadRegret() { | |
| const r = await fetch('/api/regret'); | |
| if (!r.ok) { alert('Run a simulation first.'); return; } | |
| const d = await r.json(); | |
| if (!d.steps || d.steps.length === 0) { alert('No simulation data yet.'); return; } | |
| const traces = [ | |
| {x:d.steps, y:d.cumulative_regret.epsilon_greedy, name:'Ξ΅-Greedy', line:{color:'#f59e0b'}}, | |
| {x:d.steps, y:d.cumulative_regret.ucb1, name:'UCB1', line:{color:'#10b981'}}, | |
| {x:d.steps, y:d.cumulative_regret.thompson, name:'Thompson', line:{color:'#3b82f6'}}, | |
| {x:d.steps, y:d.cumulative_regret.linucb, name:'LinUCB', line:{color:'#ef4444'}}, | |
| ]; | |
| Plotly.react('regret-chart', traces, { | |
| template:'plotly_dark', paper_bgcolor:'#16213e', plot_bgcolor:'#0f0f1a', | |
| margin:{t:10,b:40,l:50,r:10}, autosize:true, | |
| xaxis:{title:'Impression', color:'#94a3b8', gridcolor:'#1e293b'}, | |
| yaxis:{title:'Cumulative Regret', color:'#94a3b8', gridcolor:'#1e293b'}, | |
| legend:{bgcolor:'#16213e', font:{color:'#e2e8f0'}}, | |
| }, {responsive:true}); | |
| const keys = ['epsilon_greedy','ucb1','thompson','linucb']; | |
| const names = {'epsilon_greedy':'Ξ΅-Greedy','ucb1':'UCB1','thompson':'Thompson','linucb':'LinUCB'}; | |
| const colors = {'epsilon_greedy':'#f59e0b','ucb1':'#10b981','thompson':'#3b82f6','linucb':'#ef4444'}; | |
| let html = '<table><thead><tr><th>Algorithm</th><th>Final Cumulative Regret</th><th>Avg Per-Step Regret</th></tr></thead><tbody>'; | |
| keys.forEach(k => { | |
| html += '<tr><td style="color:' + colors[k] + ';font-weight:600;">' + names[k] + '</td>' | |
| + '<td>' + d.final_regret[k].toFixed(2) + '</td>' | |
| + '<td>' + d.avg_regret[k].toFixed(4) + '</td></tr>'; | |
| }); | |
| html += '</tbody></table>'; | |
| document.getElementById('regret-table-container').innerHTML = html; | |
| } | |
| // ββ Tab 4: A/B Test ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function runABTest() { | |
| const pA = document.getElementById('ab-policy-a').value; | |
| const pB = document.getElementById('ab-policy-b').value; | |
| const n = parseInt(document.getElementById('ab-impressions').value); | |
| if (pA === pB) { alert('Please select two different policies.'); return; } | |
| const btn = document.getElementById('run-ab-btn'); | |
| btn.disabled = true; btn.textContent = 'Runningβ¦'; | |
| try { | |
| const r = await fetch('/api/abtest', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify({policy_a: pA, policy_b: pB, n_impressions: n}) | |
| }); | |
| const d = await r.json(); | |
| const names = {epsilon_greedy:'Ξ΅-Greedy', ucb1:'UCB1', thompson:'Thompson', linucb:'LinUCB'}; | |
| document.getElementById('ab-results').style.display = 'block'; | |
| document.getElementById('ab-lbl-a').textContent = names[pA] + ' CTR'; | |
| document.getElementById('ab-lbl-b').textContent = names[pB] + ' CTR'; | |
| document.getElementById('ab-ctr-a').textContent = (d.ctr_a * 100).toFixed(2) + '%'; | |
| document.getElementById('ab-ctr-b').textContent = (d.ctr_b * 100).toFixed(2) + '%'; | |
| document.getElementById('ab-lift').textContent = (d.lift_abs * 100).toFixed(3) + '%'; | |
| document.getElementById('ab-lift-rel').textContent = (d.lift_rel * 100).toFixed(1) + '%'; | |
| document.getElementById('ab-z').textContent = d.z_stat.toFixed(3); | |
| document.getElementById('ab-p').textContent = d.p_value.toFixed(4); | |
| document.getElementById('ab-ci').textContent = '[' + (d.ci_low*100).toFixed(3) + '%, ' + (d.ci_high*100).toFixed(3) + '%]'; | |
| const vEl = document.getElementById('ab-verdict'); | |
| if (d.significant) { | |
| vEl.textContent = 'β Significant (p<0.05)'; vEl.className = 'stat-val verdict-sig'; | |
| } else { | |
| vEl.textContent = 'β Not Significant'; vEl.className = 'stat-val verdict-ns'; | |
| } | |
| // Bar chart with error bars | |
| const ctrA = d.ctr_a, ctrB = d.ctr_b; | |
| const seA = Math.sqrt(ctrA*(1-ctrA)/d.n_a), seB = Math.sqrt(ctrB*(1-ctrB)/d.n_b); | |
| const traceAB = { | |
| x:[names[pA], names[pB]], y:[ctrA, ctrB], | |
| type:'bar', marker:{color:['#7c3aed','#0ea5e9']}, | |
| error_y:{type:'data', array:[1.96*seA, 1.96*seB], visible:true, color:'#e2e8f0'}, | |
| text:[(ctrA*100).toFixed(2)+'%', (ctrB*100).toFixed(2)+'%'], | |
| textposition:'outside', | |
| }; | |
| Plotly.react('ab-chart', [traceAB], { | |
| template:'plotly_dark', paper_bgcolor:'#16213e', plot_bgcolor:'#0f0f1a', | |
| margin:{t:20,b:40,l:50,r:10}, autosize:true, showlegend:false, | |
| yaxis:{title:'CTR', color:'#94a3b8', gridcolor:'#1e293b'}, | |
| }, {responsive:true}); | |
| } catch(e) { | |
| alert('Error: ' + e.message); | |
| } finally { | |
| btn.disabled = false; btn.textContent = 'β Run A/B Test'; | |
| } | |
| } | |
| // ββ Tab 5: Heatmap βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async function loadHeatmap() { | |
| const algo = document.getElementById('hm-algo').value; | |
| const r = await fetch('/api/heatmap', { | |
| method:'POST', headers:{'Content-Type':'application/json'}, | |
| body: JSON.stringify({algorithm: algo}) | |
| }); | |
| const d = await r.json(); | |
| const trace = { | |
| z: d.matrix, x: d.ad_cats, y: d.content_cats, | |
| type:'heatmap', colorscale:'Viridis', | |
| hoverongaps:false, | |
| colorbar:{title:'Est. CTR', tickfont:{color:'#e2e8f0'}, titlefont:{color:'#e2e8f0'}}, | |
| text: d.matrix.map(row => row.map(v => (v*100).toFixed(2)+'%')), | |
| texttemplate:'%{text}', textfont:{color:'#fff', size:11}, | |
| }; | |
| const names = {epsilon_greedy:'Ξ΅-Greedy', ucb1:'UCB1', thompson:'Thompson', linucb:'LinUCB'}; | |
| Plotly.react('heatmap-chart', [trace], { | |
| template:'plotly_dark', paper_bgcolor:'#16213e', plot_bgcolor:'#0f0f1a', | |
| margin:{t:30,b:60,l:120,r:10}, autosize:true, | |
| title:{text:'Estimated CTR β ' + names[algo], font:{color:'#e2e8f0', size:13}}, | |
| xaxis:{title:'Ad Category', color:'#94a3b8'}, | |
| yaxis:{title:'User Content Category', color:'#94a3b8'}, | |
| }, {responsive:true}); | |
| } | |
| // Auto-load heatmap once the page is fully ready | |
| document.addEventListener('DOMContentLoaded', function() { loadHeatmap(); }); | |
| </script> | |
| </body> | |
| </html>""" | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Flask routes | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def index(): | |
| return render_template_string(TEMPLATE) | |
| def api_status(): | |
| with sim_lock: | |
| return jsonify({ | |
| "running": sim_state["running"], | |
| "step": sim_state["step"], | |
| "total": sim_state["total"], | |
| }) | |
| def api_recommend(): | |
| data = request.get_json(force=True) | |
| try: | |
| ctx = encode_context( | |
| data['age'], data['device'], data['tod'], | |
| data['content'], data['region'] | |
| ) | |
| except (KeyError, ValueError) as e: | |
| return jsonify({"error": str(e)}), 400 | |
| result = {} | |
| for key, algo in algorithms.items(): | |
| ad_idx = algo.select(ctx) | |
| score = algo.predict_ctr(ctx, ad_idx) | |
| ad_id = AD_IDS[ad_idx] | |
| result[key] = { | |
| "ad_id": ad_id, | |
| "category": AD_CAT_MAP[ad_id], | |
| "format": AD_FORMATS[ad_id], | |
| "bid": AD_BIDS[ad_id], | |
| "score": round(score, 4), | |
| } | |
| return jsonify(result) | |
| def api_simulate(): | |
| data = request.get_json(force=True) | |
| n_impressions = int(data.get('n_impressions', 3000)) | |
| seed = int(data.get('seed', 42)) | |
| n_impressions = max(1000, min(10000, n_impressions)) | |
| def generate(): | |
| # Reset all algorithm states | |
| for algo in algorithms.values(): | |
| algo.reset() | |
| np.random.seed(seed) | |
| with sim_lock: | |
| sim_state['running'] = True | |
| sim_state['step'] = 0 | |
| sim_state['total'] = n_impressions | |
| rewards = {k: [] for k in ALGO_KEYS} | |
| checkpoint_interval = 50 | |
| # Per-checkpoint rolling window (last 100 impressions) | |
| rolling_window = 100 | |
| rolling_ctr_series = {k: [] for k in ALGO_KEYS} | |
| steps_series = [] | |
| # Incremental cumulative regret (avoids O(nΒ²) post-loop recomputation) | |
| cum_regret = {k: 0.0 for k in ALGO_KEYS} | |
| cum_regret_series = {k: [] for k in ALGO_KEYS} | |
| for t in range(n_impressions): | |
| ctx = sample_random_context() | |
| # Vectorized oracle best arm | |
| all_ctrs = np.clip(_sigmoid(_TRUE_WEIGHTS @ ctx), 0.02, 0.25) | |
| oracle_idx = int(np.argmax(all_ctrs)) | |
| oracle_r = int(np.random.rand() < all_ctrs[oracle_idx]) | |
| # Each algorithm selects, receives reward, updates | |
| for k, algo in algorithms.items(): | |
| act = algo.select(ctx) | |
| r = int(np.random.rand() < all_ctrs[act]) | |
| algo.update(ctx, act, r) | |
| rewards[k].append(r) | |
| cum_regret[k] += oracle_r - r | |
| # Checkpoint every `checkpoint_interval` steps | |
| if (t + 1) % checkpoint_interval == 0 or t == n_impressions - 1: | |
| steps_series.append(t + 1) | |
| for k in ALGO_KEYS: | |
| start = max(0, len(rewards[k]) - rolling_window) | |
| window = rewards[k][start:] | |
| rolling_ctr_series[k].append(round(sum(window) / len(window), 4)) | |
| cum_regret_series[k].append(round(cum_regret[k], 4)) | |
| with sim_lock: | |
| sim_state['step'] = t + 1 | |
| payload = { | |
| "step": t + 1, | |
| "total": n_impressions, | |
| "done": False, | |
| } | |
| yield f"data: {json.dumps(payload)}\n\n" | |
| # Final payload with full series | |
| final_ctr = {k: round(sum(rewards[k]) / len(rewards[k]), 4) for k in ALGO_KEYS} | |
| total_rew = {k: int(sum(rewards[k])) for k in ALGO_KEYS} | |
| n_upd = {k: algorithms[k].n_updates for k in ALGO_KEYS} | |
| # Store for /api/regret | |
| with sim_lock: | |
| sim_state['running'] = False | |
| sim_state['last_results'] = { | |
| 'steps': steps_series, | |
| 'cumulative_regret': cum_regret_series, | |
| 'final_regret': {k: cum_regret_series[k][-1] for k in ALGO_KEYS}, | |
| 'avg_regret': {k: round(cum_regret_series[k][-1] / n_impressions, 5) for k in ALGO_KEYS}, | |
| } | |
| final_payload = { | |
| "done": True, | |
| "step": n_impressions, | |
| "total": n_impressions, | |
| "n_impressions": n_impressions, | |
| "steps": steps_series, | |
| "rolling_ctr": rolling_ctr_series, | |
| "final_ctr": final_ctr, | |
| "total_reward": total_rew, | |
| "n_updates": n_upd, | |
| } | |
| yield f"data: {json.dumps(final_payload)}\n\n" | |
| return Response( | |
| generate(), | |
| mimetype='text/event-stream', | |
| headers={'Cache-Control': 'no-cache', 'X-Accel-Buffering': 'no'}, | |
| ) | |
| def api_regret(): | |
| with sim_lock: | |
| results = sim_state.get('last_results') | |
| if results is None: | |
| return jsonify({"error": "No simulation results available. Run a simulation first."}), 404 | |
| return jsonify(results) | |
| def api_abtest(): | |
| data = request.get_json(force=True) | |
| key_a = data.get('policy_a', 'linucb') | |
| key_b = data.get('policy_b', 'ucb1') | |
| n_tot = int(data.get('n_impressions', 5000)) | |
| n_tot = max(1000, min(20000, n_tot)) | |
| if key_a not in ALGO_CLASSES or key_b not in ALGO_CLASSES: | |
| return jsonify({"error": "Invalid policy key"}), 400 | |
| if key_a == key_b: | |
| return jsonify({"error": "Policy A and B must differ"}), 400 | |
| algo_a = ALGO_CLASSES[key_a]() | |
| algo_b = ALGO_CLASSES[key_b]() | |
| n_each = n_tot // 2 | |
| np.random.seed(1) | |
| r_a, r_b = [], [] | |
| for _ in range(n_each): | |
| ctx = sample_random_context() | |
| act = algo_a.select(ctx) | |
| rew = int(np.random.rand() < true_ctr(act, ctx)) | |
| algo_a.update(ctx, act, rew) | |
| r_a.append(rew) | |
| for _ in range(n_each): | |
| ctx = sample_random_context() | |
| act = algo_b.select(ctx) | |
| rew = int(np.random.rand() < true_ctr(act, ctx)) | |
| algo_b.update(ctx, act, rew) | |
| r_b.append(rew) | |
| n1, n2 = len(r_a), len(r_b) | |
| p1, p2 = sum(r_a) / n1, sum(r_b) / n2 | |
| p_pool = (sum(r_a) + sum(r_b)) / (n1 + n2) | |
| se = math.sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2)) if p_pool not in (0, 1) else 1e-9 | |
| z = (p1 - p2) / se | |
| p_value = float(2 * (1 - stats.norm.cdf(abs(z)))) | |
| se_diff = math.sqrt(p1*(1-p1)/n1 + p2*(1-p2)/n2) | |
| ci_low = (p1 - p2) - 1.96 * se_diff | |
| ci_high = (p1 - p2) + 1.96 * se_diff | |
| return jsonify({ | |
| "ctr_a": round(p1, 5), | |
| "ctr_b": round(p2, 5), | |
| "n_a": n1, | |
| "n_b": n2, | |
| "lift_abs": round(p1 - p2, 5), | |
| "lift_rel": round((p1 - p2) / max(p2, 1e-9), 5), | |
| "z_stat": round(z, 4), | |
| "p_value": round(p_value, 5), | |
| "ci_low": round(ci_low, 5), | |
| "ci_high": round(ci_high, 5), | |
| "significant": p_value < 0.05, | |
| }) | |
| def api_heatmap(): | |
| data = request.get_json(force=True) | |
| algo_key = data.get('algorithm', 'linucb') | |
| if algo_key not in algorithms: | |
| return jsonify({"error": "Invalid algorithm"}), 400 | |
| algo = algorithms[algo_key] | |
| ad_cats = ["Tech", "Fashion", "Finance", "Food", "Travel"] | |
| matrix = [] | |
| for content in CONTENT_CATS: | |
| row = [] | |
| for ad_cat in ad_cats: | |
| # Representative ad: first ad of this category | |
| ad_idx_for_cat = ad_cats.index(ad_cat) * 4 # ad_01, ad_05, ad_09, ad_13, ad_17 | |
| ctx = encode_context("adult", "desktop", "afternoon", content, "north_america") | |
| score = algo.predict_ctr(ctx, ad_idx_for_cat) | |
| row.append(round(float(score), 5)) | |
| matrix.append(row) | |
| return jsonify({ | |
| "matrix": matrix, | |
| "content_cats": CONTENT_CATS, | |
| "ad_cats": ad_cats, | |
| "algorithm": algo_key, | |
| }) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| if __name__ == '__main__': | |
| app.run(host='0.0.0.0', port=7860, debug=False, threaded=True) | |