Spaces:
Sleeping
Sleeping
Upload 8 files
Browse files- Dockerfile +23 -0
- app.py +71 -0
- bandit.py +115 -0
- causal.py +120 -0
- dashboard.py +65 -0
- data.py +74 -0
- requirements.txt +13 -0
- utils.py +33 -0
Dockerfile
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces(Docker)でFastAPIを動かす最小構成
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 5 |
+
PYTHONUNBUFFERED=1 \
|
| 6 |
+
PIP_NO_CACHE_DIR=1
|
| 7 |
+
|
| 8 |
+
# 基本ビルド依存
|
| 9 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 10 |
+
build-essential \
|
| 11 |
+
libatlas-base-dev \
|
| 12 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
WORKDIR /app
|
| 15 |
+
COPY requirements.txt /app/
|
| 16 |
+
RUN pip install -r requirements.txt
|
| 17 |
+
|
| 18 |
+
COPY . /app
|
| 19 |
+
|
| 20 |
+
# HF Spacesはポート 7860 を利用
|
| 21 |
+
EXPOSE 7860
|
| 22 |
+
|
| 23 |
+
CDM["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from fastapi import FastAPI, UploadFile, File, Body
|
| 3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 4 |
+
from fastapi.responses import JSONResponse
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from typing import Dict, Any, List
|
| 7 |
+
|
| 8 |
+
from data import append_events, read_events, aggregate
|
| 9 |
+
from bandit import EmpiricalBayesHierarchicalThompson
|
| 10 |
+
from causal import fit_uplift_binary
|
| 11 |
+
from utils import dumps
|
| 12 |
+
|
| 13 |
+
# Gradio を FastAPI にマウント
|
| 14 |
+
from dashboard import build_ui
|
| 15 |
+
import gradio as gr
|
| 16 |
+
|
| 17 |
+
app = FastAPI(title="AdCopy MAB Optimizer Pro", version="0.1.0")
|
| 18 |
+
app.add_middleware(
|
| 19 |
+
CORSMiddleware,
|
| 20 |
+
allow_origins=["*"],
|
| 21 |
+
allow_credentials=True,
|
| 22 |
+
allow_methods=["*"],
|
| 23 |
+
allow_headers=["*"],
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
ui = build_ui()
|
| 27 |
+
app = gr.mount_gradio_app(app, ui, path="/")
|
| 28 |
+
|
| 29 |
+
BANDIT = EmpiricalBayesHierarchicalThompson(min_explore=0.05, margin=0.0, n_draws=20000)
|
| 30 |
+
|
| 31 |
+
@app.get("/api/health")
|
| 32 |
+
def health():
|
| 33 |
+
return {"status": "ok"}
|
| 34 |
+
|
| 35 |
+
@app.get("/api/events")
|
| 36 |
+
def get_events():
|
| 37 |
+
df = read_events()
|
| 38 |
+
return JSONResponse(content=df.to_dict(orient="records"))
|
| 39 |
+
|
| 40 |
+
@app.post("/api/ingest")
|
| 41 |
+
def ingest(rows: List[Dict[str, Any]] = Body(..., embed=True)):
|
| 42 |
+
"""
|
| 43 |
+
rows: [
|
| 44 |
+
{"date":"2025-09-01","medium":"FB","creative":"A1","is_control":1,"impressions":1000,"clicks":30,"conversions":5,"cost":1000.0,"features_json":"{\\"len\\":20}"},
|
| 45 |
+
...
|
| 46 |
+
]
|
| 47 |
+
"""
|
| 48 |
+
df = pd.DataFrame(rows)
|
| 49 |
+
append_events(df)
|
| 50 |
+
return {"ok": True, "n": len(df)}
|
| 51 |
+
|
| 52 |
+
@app.get("/api/aggregate")
|
| 53 |
+
def get_agg():
|
| 54 |
+
agg = aggregate()
|
| 55 |
+
return JSONResponse(content=agg.to_dict(orient="records"))
|
| 56 |
+
|
| 57 |
+
@app.post("/api/optimize")
|
| 58 |
+
def optimize():
|
| 59 |
+
agg = aggregate()
|
| 60 |
+
if agg.empty:
|
| 61 |
+
return {"message": "no data"}
|
| 62 |
+
rec = BANDIT.recommend(agg)
|
| 63 |
+
return JSONResponse(content=rec)
|
| 64 |
+
|
| 65 |
+
@app.post("/api/uplift")
|
| 66 |
+
def uplift():
|
| 67 |
+
agg = aggregate()
|
| 68 |
+
if agg.empty:
|
| 69 |
+
return {"message": "no data"}
|
| 70 |
+
res = fit_uplift_binary(agg)
|
| 71 |
+
return JSONResponse(content=res)
|
bandit.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from typing import Dict, Any, Tuple
|
| 5 |
+
|
| 6 |
+
# ------------------------------
|
| 7 |
+
# Empirical Bayes hierarchical Beta-Binomial + Thompson Sampling
|
| 8 |
+
# ------------------------------
|
| 9 |
+
|
| 10 |
+
class EmpiricalBayesHierarchicalThompson:
|
| 11 |
+
"""
|
| 12 |
+
各アーム (medium, creative) のクリック率 p を Beta 事前分布で表現。
|
| 13 |
+
事前 Beta(a0, b0) は全アームの経験ベイズ推定で安定化。
|
| 14 |
+
事後: Beta(a0 + clicks, b0 + impressions - clicks)
|
| 15 |
+
|
| 16 |
+
* 少データ時に極端な推定を避ける
|
| 17 |
+
* Thompson Sampling により配分を提案
|
| 18 |
+
* 自動停止/増配分判断を提供
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
def __init__(self, min_explore: float = 0.05, margin: float = 0.0, n_draws: int = 20000, seed: int = 42):
|
| 22 |
+
self.min_explore = min_explore # 各アームの最低配分
|
| 23 |
+
self.margin = margin # 改善余地のマージン(ex: 0.002 = 0.2pp)
|
| 24 |
+
self.n_draws = n_draws
|
| 25 |
+
self.rng = np.random.default_rng(seed)
|
| 26 |
+
|
| 27 |
+
@staticmethod
|
| 28 |
+
def _eb_prior_by_moments(arm_df: pd.DataFrame) -> Tuple[float, float]:
|
| 29 |
+
"""
|
| 30 |
+
アーム別 CTR の分散を利用して Beta(a0, b0) を MoM 推定。
|
| 31 |
+
既知の式: mean=m, var=v => a0 = m*(m*(1-m)/v - 1), b0 = (1-m)*(m*(1-m)/v - 1)
|
| 32 |
+
v が小さすぎる/0 の場合は弱情報事前を返す。
|
| 33 |
+
"""
|
| 34 |
+
# 各アームの粗推定 CTR(Laplace 平滑で安定化)
|
| 35 |
+
ctr = (arm_df["clicks"] + 1) / (arm_df["impressions"] + 2)
|
| 36 |
+
m = float(np.clip(ctr.mean(), 1e-6, 1 - 1e-6))
|
| 37 |
+
v = float(np.var(ctr, ddof=1))
|
| 38 |
+
if not np.isfinite(v) or v <= 1e-8:
|
| 39 |
+
# ほぼ同一のCTR → 弱情報事前
|
| 40 |
+
return 1.0, 1.0
|
| 41 |
+
k = m * (1 - m) / v - 1.0
|
| 42 |
+
if k <= 0 or not np.isfinite(k):
|
| 43 |
+
return 1.0, 1.0
|
| 44 |
+
a0 = m * k
|
| 45 |
+
b0 = (1 - m) * k
|
| 46 |
+
# 上限/下限を設定
|
| 47 |
+
a0 = float(np.clip(a0, 0.5, 1000))
|
| 48 |
+
b0 = float(np.clip(b0, 0.5, 1000))
|
| 49 |
+
return a0, b0
|
| 50 |
+
|
| 51 |
+
def _posterior_params(self, df: pd.DataFrame) -> pd.DataFrame:
|
| 52 |
+
a0, b0 = self._eb_prior_by_moments(df)
|
| 53 |
+
post = df.copy()
|
| 54 |
+
post["alpha"] = a0 + post["clicks"].astype(float)
|
| 55 |
+
post["beta"] = b0 + (post["impressions"] - post["clicks"]).astype(float)
|
| 56 |
+
post["post_mean"] = post["alpha"] / (post["alpha"] + post["beta"])
|
| 57 |
+
post["post_var"] = (post["alpha"] * post["beta"]) / (((post["alpha"] + post["beta"])**2) * (post["alpha"] + post["beta"] + 1))
|
| 58 |
+
post["a0"] = a0
|
| 59 |
+
post["b0"] = b0
|
| 60 |
+
return post
|
| 61 |
+
|
| 62 |
+
def recommend(self, df: pd.DataFrame) -> Dict[str, Any]:
|
| 63 |
+
"""媒体ごとにTSで配分率を提案し、停止/増配分を判断。"""
|
| 64 |
+
post = self._posterior_params(df)
|
| 65 |
+
out = {}
|
| 66 |
+
for medium, g in post.groupby("medium"):
|
| 67 |
+
arms = g.reset_index(drop=True)
|
| 68 |
+
K = len(arms)
|
| 69 |
+
samples = self.rng.beta(arms["alpha"].values, arms["beta"].values, size=(self.n_draws, K))
|
| 70 |
+
# ベースライン(control があればそれを優先)
|
| 71 |
+
if (arms["is_control"] == 1).any():
|
| 72 |
+
base_idx = int(arms.index[arms["is_control"] == 1][0])
|
| 73 |
+
else:
|
| 74 |
+
base_idx = int(arms["post_mean"].idxmax())
|
| 75 |
+
base_col = list(arms.index).index(base_idx)
|
| 76 |
+
|
| 77 |
+
winners = np.argmax(samples, axis=1)
|
| 78 |
+
win_prob = np.bincount(winners, minlength=K) / self.n_draws
|
| 79 |
+
|
| 80 |
+
# 各アームがベースより (margin) だけ下回る確率
|
| 81 |
+
worse_than_base = (samples.T < (samples[:, base_col] - self.margin)).mean(axis=1)
|
| 82 |
+
|
| 83 |
+
# 停止・増配分判定
|
| 84 |
+
decisions = []
|
| 85 |
+
for k in range(K):
|
| 86 |
+
d = {
|
| 87 |
+
"creative": arms.loc[k, "creative"],
|
| 88 |
+
"is_control": int(arms.loc[k, "is_control"]),
|
| 89 |
+
"post_mean": float(arms.loc[k, "post_mean"]),
|
| 90 |
+
"win_prob": float(win_prob[k]),
|
| 91 |
+
"worse_than_base_prob": float(worse_than_base[k]),
|
| 92 |
+
"status": "hold"
|
| 93 |
+
}
|
| 94 |
+
if d["worse_than_base_prob"] >= 0.9 and arms.loc[k, "impressions"] >= 200:
|
| 95 |
+
d["status"] = "stop"
|
| 96 |
+
elif d["win_prob"] >= 0.95 and arms.loc[k, "impressions"] >= 200:
|
| 97 |
+
d["status"] = "boost"
|
| 98 |
+
decisions.append(d)
|
| 99 |
+
|
| 100 |
+
# 配分:勝者確率に基づき、min_explore を確保
|
| 101 |
+
alloc = win_prob.copy()
|
| 102 |
+
alloc = alloc / alloc.sum()
|
| 103 |
+
alloc = np.clip(alloc, self.min_explore, 1.0)
|
| 104 |
+
alloc = alloc / alloc.sum()
|
| 105 |
+
|
| 106 |
+
out[str(medium)] = {
|
| 107 |
+
"arms": arms[["creative", "impressions", "clicks", "post_mean", "is_control"]].assign(
|
| 108 |
+
win_prob=win_prob,
|
| 109 |
+
worse_than_base_prob=worse_than_base,
|
| 110 |
+
).to_dict(orient="records"),
|
| 111 |
+
"allocation": {str(arms.loc[k, "creative"]): float(alloc[k]) for k in range(K)},
|
| 112 |
+
"decisions": decisions,
|
| 113 |
+
"posterior_prior": {"a0": float(arms.loc[0, "a0"]), "b0": float(arms.loc[0, "b0"])},
|
| 114 |
+
}
|
| 115 |
+
return out
|
causal.py
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import pymc as pm
|
| 5 |
+
import pytensor.tensor as at
|
| 6 |
+
from typing import Dict, Any, Optional
|
| 7 |
+
|
| 8 |
+
"""
|
| 9 |
+
階層ロジスティック回帰で uplift(コントロールとの差)を推定。
|
| 10 |
+
- 目的変数: click (0/1) 推奨。集計単位が日次のときは二項モデルに切替可能。
|
| 11 |
+
- 階層: 媒体ランダム効果。
|
| 12 |
+
- 処置: creative(control=1 の行を基準)
|
| 13 |
+
- 共変量: 任意の数値列(正規化して利用)
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def _zscore(df: pd.DataFrame, cols):
|
| 17 |
+
x = df[cols].astype(float)
|
| 18 |
+
return (x - x.mean()) / (x.std(ddof=0) + 1e-9)
|
| 19 |
+
|
| 20 |
+
def fit_uplift_binary(
|
| 21 |
+
df: pd.DataFrame,
|
| 22 |
+
outcome_col: str = "click_bin",
|
| 23 |
+
medium_col: str = "medium",
|
| 24 |
+
creative_col: str = "creative",
|
| 25 |
+
control_flag_col: str = "is_control",
|
| 26 |
+
feature_cols: Optional[list] = None,
|
| 27 |
+
draws: int = 1000,
|
| 28 |
+
target_accept: float = 0.9,
|
| 29 |
+
random_seed: int = 42,
|
| 30 |
+
) -> Dict[str, Any]:
|
| 31 |
+
# 前処理
|
| 32 |
+
d = df.copy().reset_index(drop=True)
|
| 33 |
+
if outcome_col not in d.columns:
|
| 34 |
+
# 集計データ (impr, clicks) から擬似サンプル化
|
| 35 |
+
# 二項尤度に切替
|
| 36 |
+
d["n"] = d["impressions"].astype(int)
|
| 37 |
+
d["y"] = d["clicks"].astype(int)
|
| 38 |
+
binomial = True
|
| 39 |
+
else:
|
| 40 |
+
d["y"] = d[outcome_col].astype(int)
|
| 41 |
+
d["n"] = 1
|
| 42 |
+
binomial = False
|
| 43 |
+
|
| 44 |
+
# creative の one-hot と control 基準
|
| 45 |
+
creatives = d[creative_col].astype(str).unique().tolist()
|
| 46 |
+
control_creatives = d.loc[d[control_flag_col] == 1, creative_col].astype(str).unique().tolist()
|
| 47 |
+
control_ref = control_creatives[0] if len(control_creatives) else creatives[0]
|
| 48 |
+
|
| 49 |
+
d["creative_idx"] = d[creative_col].astype(str).apply(lambda x: creatives.index(x)).astype(int)
|
| 50 |
+
d["medium_idx"] = d[medium_col].astype(str).astype('category').cat.codes.values
|
| 51 |
+
|
| 52 |
+
# 特徴量
|
| 53 |
+
X = None
|
| 54 |
+
if feature_cols:
|
| 55 |
+
X = _zscore(d, feature_cols).values
|
| 56 |
+
p = X.shape[1]
|
| 57 |
+
else:
|
| 58 |
+
p = 0
|
| 59 |
+
|
| 60 |
+
with pm.Model() as model:
|
| 61 |
+
# ランダム効果: 媒体
|
| 62 |
+
n_medium = int(d["medium_idx"].max()) + 1
|
| 63 |
+
mu_re = pm.Normal("mu_re", 0.0, 1.0)
|
| 64 |
+
sd_re = pm.HalfNormal("sd_re", 1.0)
|
| 65 |
+
z_re = pm.Normal("z_re", 0.0, 1.0, shape=n_medium)
|
| 66 |
+
b_medium = pm.Deterministic("b_medium", mu_re + z_re * sd_re)
|
| 67 |
+
|
| 68 |
+
# creative 固定効果(baseline = control_ref)
|
| 69 |
+
n_creative = len(creatives)
|
| 70 |
+
b0 = pm.Normal("intercept", 0.0, 1.5)
|
| 71 |
+
b_cre = pm.Normal("b_cre_raw", 0.0, 1.0, shape=n_creative)
|
| 72 |
+
# 基準調整
|
| 73 |
+
ref_idx = creatives.index(control_ref)
|
| 74 |
+
b_cre_adj = at.set_subtensor(b_cre[ref_idx], 0.0)
|
| 75 |
+
|
| 76 |
+
# 連続特徴量
|
| 77 |
+
if p > 0:
|
| 78 |
+
b_x = pm.Normal("b_x", 0.0, 1.0, shape=p)
|
| 79 |
+
lin = b0 + b_cre_adj[d["creative_idx"].values] + b_medium[d["medium_idx"]].values + at.dot(X, b_x)
|
| 80 |
+
else:
|
| 81 |
+
lin = b0 + b_cre_adj[d["creative_idx"].values] + b_medium[d["medium_idx"]].values
|
| 82 |
+
|
| 83 |
+
p_click = pm.Deterministic("p_click", pm.math.sigmoid(lin))
|
| 84 |
+
|
| 85 |
+
if binomial:
|
| 86 |
+
pm.Binomial("y_obs", n=d["n"].values, p=p_click, observed=d["y"].values)
|
| 87 |
+
else:
|
| 88 |
+
pm.Bernoulli("y_obs", p=p_click, observed=d["y"].values)
|
| 89 |
+
|
| 90 |
+
idata = pm.sample(draws=draws, tune=draws, chains=2, target_accept=target_accept, random_seed=random_seed, progressbar=False)
|
| 91 |
+
|
| 92 |
+
# uplift: 各 creative の p_click を control_ref と比較
|
| 93 |
+
post = idata.posterior
|
| 94 |
+
# サンプル次元: chain, draw
|
| 95 |
+
b0_s = post["intercept"].stack(sample=("chain", "draw"))
|
| 96 |
+
b_cre_s = post["b_cre_raw"].stack(sample=("chain", "draw"))
|
| 97 |
+
# uplift は "平均的ユーザー" と "平均的媒体効果" を前提に比較
|
| 98 |
+
mu_re = post["mu_re"].stack(sample=("chain", "draw"))
|
| 99 |
+
|
| 100 |
+
def sigmoid(x):
|
| 101 |
+
return 1 / (1 + np.exp(-x))
|
| 102 |
+
|
| 103 |
+
results = []
|
| 104 |
+
for cr in creatives:
|
| 105 |
+
idx = creatives.index(cr)
|
| 106 |
+
lin_t = b0_s + b_cre_s.isel(b_cre_raw_dim_0=idx) + mu_re
|
| 107 |
+
lin_c = b0_s + b_cre_s.isel(b_cre_raw_dim_0=ref_idx) + mu_re
|
| 108 |
+
uplift = sigmoid(lin_t) - sigmoid(lin_c)
|
| 109 |
+
results.append({
|
| 110 |
+
"creative": cr,
|
| 111 |
+
"uplift_mean": float(uplift.mean().item()),
|
| 112 |
+
"uplift_p_gt0": float((uplift > 0).mean().item()),
|
| 113 |
+
"control_ref": control_ref,
|
| 114 |
+
})
|
| 115 |
+
|
| 116 |
+
return {
|
| 117 |
+
"control_ref": control_ref,
|
| 118 |
+
"creatives": creatives,
|
| 119 |
+
"results": results,
|
| 120 |
+
}
|
dashboard.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import gradio as gr
|
| 4 |
+
import plotly.express as px
|
| 5 |
+
from data import read_events, aggregate
|
| 6 |
+
from bandit import EmpiricalBayesHierarchicalThompson
|
| 7 |
+
from causal import fit_uplift_binary
|
| 8 |
+
|
| 9 |
+
BANDIT = EmpiricalBayesHierarchicalThompson(min_explore=0.05, margin=0.0, n_draws=10000)
|
| 10 |
+
|
| 11 |
+
def ui_refresh_tables():
|
| 12 |
+
df = read_events()
|
| 13 |
+
agg = aggregate()
|
| 14 |
+
return df, agg
|
| 15 |
+
|
| 16 |
+
def ui_recommend():
|
| 17 |
+
agg = aggregate()
|
| 18 |
+
if agg.empty:
|
| 19 |
+
return {"message": "No data yet. Upload or POST /api/ingest first."}
|
| 20 |
+
rec = BANDIT.recommend(agg)
|
| 21 |
+
return rec
|
| 22 |
+
|
| 23 |
+
def ui_plot_posteriors(medium: str):
|
| 24 |
+
agg = aggregate()
|
| 25 |
+
if agg.empty:
|
| 26 |
+
return gr.update(visible=False), "No data"
|
| 27 |
+
g = agg[agg["medium"].astype(str) == str(medium)].copy()
|
| 28 |
+
if g.empty:
|
| 29 |
+
return gr.update(visible=False), f"No data for medium={medium}"
|
| 30 |
+
|
| 31 |
+
# 事後平均の棒グラフ(Laplace 平滑 CTR)
|
| 32 |
+
g["ctr"] = (g["clicks"] + 1) / (g["impressions"] + 2)
|
| 33 |
+
fig = px.bar(g, x="creative", y="ctr", color="is_control", barmode="group", title=f"CTR (Laplace) by creative @ {medium}")
|
| 34 |
+
return gr.Plot(fig), ""
|
| 35 |
+
|
| 36 |
+
def ui_fit_uplift():
|
| 37 |
+
agg = aggregate()
|
| 38 |
+
if agg.empty:
|
| 39 |
+
return {"message": "No data"}
|
| 40 |
+
res = fit_uplift_binary(agg)
|
| 41 |
+
return res
|
| 42 |
+
|
| 43 |
+
def build_ui():
|
| 44 |
+
with gr.Blocks(title="AdCopy MAB Optimizer Pro") as demo:
|
| 45 |
+
gr.Markdown("# AdCopy MAB Optimizer Pro — Hierarchical TS + Uplift")
|
| 46 |
+
with gr.Tab("Data"):
|
| 47 |
+
btn = gr.Button("Refresh")
|
| 48 |
+
grid = gr.Dataframe(headers=["ts","date","medium","creative","is_control","impressions","clicks","conversions","cost","features_json"], wrap=True)
|
| 49 |
+
grid_agg = gr.Dataframe()
|
| 50 |
+
btn.click(ui_refresh_tables, outputs=[grid, grid_agg])
|
| 51 |
+
with gr.Tab("Bandit"):
|
| 52 |
+
bbtn = gr.Button("Suggest Allocation (TS)")
|
| 53 |
+
jout = gr.JSON()
|
| 54 |
+
bbtn.click(ui_recommend, outputs=jout)
|
| 55 |
+
with gr.Row():
|
| 56 |
+
medium = gr.Textbox(label="Medium for Plot", value="FB")
|
| 57 |
+
plot = gr.Plot(visible=False)
|
| 58 |
+
msg = gr.Markdown()
|
| 59 |
+
plot_btn = gr.Button("Plot CTR by Creative")
|
| 60 |
+
plot_btn.click(ui_plot_posteriors, inputs=[medium], outputs=[plot, msg])
|
| 61 |
+
with gr.Tab("Uplift (Causal)"):
|
| 62 |
+
cbtn = gr.Button("Fit Uplift Model")
|
| 63 |
+
cout = gr.JSON()
|
| 64 |
+
cbtn.click(ui_fit_uplift, outputs=cout)
|
| 65 |
+
return demo
|
data.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import os
|
| 3 |
+
import json
|
| 4 |
+
from typing import Optional, Dict, Any
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
DATA_DIR = os.environ.get("DATA_DIR", "./data")
|
| 9 |
+
LOG_PATH = os.path.join(DATA_DIR, "events.csv")
|
| 10 |
+
META_PATH = os.path.join(DATA_DIR, "meta.json")
|
| 11 |
+
|
| 12 |
+
SCHEMA = [
|
| 13 |
+
"ts", # ISO timestamp
|
| 14 |
+
"data", # YYYY-MM-DD(便宜)
|
| 15 |
+
"medium", # 媒体名(例:FB,GDN)
|
| 16 |
+
"creative", # クリエイティブID/名前(例:A1)
|
| 17 |
+
"is_control" # 0/1(コントロール群)
|
| 18 |
+
"impressions", # 表示数
|
| 19 |
+
"clicks", # クリック数(または目的コンバージョン)
|
| 20 |
+
"conversions", # 追加のCV(任意:0でもOK)
|
| 21 |
+
"cost", # コスト(任意)
|
| 22 |
+
"features_json" # クリエイティブ特徴量(dictをJSON文字列で)
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 26 |
+
if not os.path.exists(LOG_PATH):
|
| 27 |
+
pd.DataFrame(columns=SCHEMA).to_csv(LOG_PATH, index=False)
|
| 28 |
+
if not os.path.exists(META_PATH):
|
| 29 |
+
with open(META_PATH, "w" , encoding="utf-8") as f:
|
| 30 |
+
json.dump({"created_at": datetime.utcnow().isoformat()}, f)
|
| 31 |
+
|
| 32 |
+
def read_events() -> pd.DataFrame:
|
| 33 |
+
df = pd.read_csv(LOG_PATH)
|
| 34 |
+
if df.empty:
|
| 35 |
+
return df
|
| 36 |
+
# 型整備
|
| 37 |
+
df["data"] = pd.to_datetime(df["date"]).dt.date.astype(str)
|
| 38 |
+
df["is_control"] = df["is_control"].fillna(0).astype(int)
|
| 39 |
+
for col in ["impressions", "clicks", "conversions"]:
|
| 40 |
+
df[col] = df[col].fillna(0).astype(int)
|
| 41 |
+
df["cost"] = df["cost"].fillna(0.0).astype(float)
|
| 42 |
+
df["features_json"] = df["features_json"].fillna("{}")
|
| 43 |
+
return df
|
| 44 |
+
|
| 45 |
+
def append_events(rows: pd.DataFrame) -> None:
|
| 46 |
+
# 必須列チェック& 補完
|
| 47 |
+
for c in SCHEMA:
|
| 48 |
+
if c not in rows.columns:
|
| 49 |
+
if c == "features_json":
|
| 50 |
+
rows[c] = "{}"
|
| 51 |
+
elif c == "ts":
|
| 52 |
+
rows[c] = datetime.utcnow().isoformat()
|
| 53 |
+
elif c == "date":
|
| 54 |
+
rows[c] = datetime.utcnow().date().isoformat()
|
| 55 |
+
elif c in ("impressions", "clicks", "conversions", "is_control"):
|
| 56 |
+
rows[c] = 0
|
| 57 |
+
elif c == "cost":
|
| 58 |
+
rows[c] = 0.0
|
| 59 |
+
else:
|
| 60 |
+
rows[c] = None
|
| 61 |
+
rows = rows[SCHEMA]
|
| 62 |
+
rows.to_csv(LOG_PATH, mode="a", header=False, index=False)
|
| 63 |
+
|
| 64 |
+
def aggregate(levels=("medium", "creative")) -> pd.DataFrame:
|
| 65 |
+
df = read_events()
|
| 66 |
+
if df.empty:
|
| 67 |
+
return pd.DataFrame(columns=[*levels, "is_control" , "impressions", "clicks", "conversions", "cost"])
|
| 68 |
+
g = df.groupby([*levels, "is_control"], dropna=False).agg(
|
| 69 |
+
impressions=("impressions", "sum"),
|
| 70 |
+
clicks=("clicks", "sum"),
|
| 71 |
+
conversions=("conversions", "sum"),
|
| 72 |
+
cost=("cost", "sum"),
|
| 73 |
+
).reset_index()
|
| 74 |
+
return g
|
requirements.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.112.2
|
| 2 |
+
uvicorn[standard]==0.30.6
|
| 3 |
+
pandas==2.2.2
|
| 4 |
+
numpy==1.26.4
|
| 5 |
+
scipy==1.12.0
|
| 6 |
+
pymc==5.15.0
|
| 7 |
+
arviz==0.18.0
|
| 8 |
+
scikit-learn==1.5.1
|
| 9 |
+
plotly==5.23.0
|
| 10 |
+
gradio==4.44.0
|
| 11 |
+
pydantic==2.8.2
|
| 12 |
+
orjson==3.10.7
|
| 13 |
+
python-multipart==0.0.9
|
utils.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import json
|
| 3 |
+
from typing import Any, Dict
|
| 4 |
+
|
| 5 |
+
def to_jsonable(obj: Any) -> Any:
|
| 6 |
+
"""Safely convert pandas/numpy objects to JSON-able python types. """
|
| 7 |
+
try:
|
| 8 |
+
import numpy as np
|
| 9 |
+
import pandas as pd
|
| 10 |
+
except Exception:
|
| 11 |
+
np = None
|
| 12 |
+
pd = None
|
| 13 |
+
|
| 14 |
+
if pd is not None and isinstance(obj, pd.DataFrame):
|
| 15 |
+
return obj.to_dict(orient="records")
|
| 16 |
+
if pd is not None and isinstance(obj, pd.Series):
|
| 17 |
+
return obj.to_dict()
|
| 18 |
+
if np is not None and isinstance(obj, (np.integer,)):
|
| 19 |
+
return int(obj)
|
| 20 |
+
if np is not None and isinstance(obj, (np.floating,)):
|
| 21 |
+
f = float(obj)
|
| 22 |
+
# JSON NaNを避ける
|
| 23 |
+
if f !=f:
|
| 24 |
+
return None
|
| 25 |
+
return f
|
| 26 |
+
if isinstance(obj, (set,)):
|
| 27 |
+
return list(obj)
|
| 28 |
+
if isinstance(obj,(bytes, bytearray)):
|
| 29 |
+
return obj.decode("utf-8", errors="ignore")
|
| 30 |
+
return obj
|
| 31 |
+
|
| 32 |
+
def dumps(d: Dict[str, Any]) -> str:
|
| 33 |
+
return json.dumps(d, ensure_ascii=False, default=to_jsonable)
|