Spaces:

Corin1998
/

AdCopy_MAB_OptimizerPro

Sleeping

App Files Files Community

Corin1998 commited on Sep 18, 2025

Commit

8b4a5e6

verified ·

1 Parent(s): 2bde095

Upload 8 files

Browse files

Files changed (8) hide show

Dockerfile +23 -0
app.py +71 -0
bandit.py +115 -0
causal.py +120 -0
dashboard.py +65 -0
data.py +74 -0
requirements.txt +13 -0
utils.py +33 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Hugging Face Spaces(Docker)でFastAPIを動かす最小構成
+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1
+# 基本ビルド依存
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    libatlas-base-dev \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt /app/
+RUN pip install -r requirements.txt
+COPY . /app
+# HF Spacesはポート 7860 を利用
+EXPOSE 7860
+CDM["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from __future__ import annotations
+from fastapi import FastAPI, UploadFile, File, Body
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+import pandas as pd
+from typing import Dict, Any, List
+from data import append_events, read_events, aggregate
+from bandit import EmpiricalBayesHierarchicalThompson
+from causal import fit_uplift_binary
+from utils import dumps
+# Gradio を FastAPI にマウント
+from dashboard import build_ui
+import gradio as gr
+app = FastAPI(title="AdCopy MAB Optimizer Pro", version="0.1.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+ui = build_ui()
+app = gr.mount_gradio_app(app, ui, path="/")
+BANDIT = EmpiricalBayesHierarchicalThompson(min_explore=0.05, margin=0.0, n_draws=20000)
+@app.get("/api/health")
+def health():
+    return {"status": "ok"}
+@app.get("/api/events")
+def get_events():
+    df = read_events()
+    return JSONResponse(content=df.to_dict(orient="records"))
+@app.post("/api/ingest")
+def ingest(rows: List[Dict[str, Any]] = Body(..., embed=True)):
+    """
+    rows: [
+      {"date":"2025-09-01","medium":"FB","creative":"A1","is_control":1,"impressions":1000,"clicks":30,"conversions":5,"cost":1000.0,"features_json":"{\\"len\\":20}"},
+      ...
+    ]
+    """
+    df = pd.DataFrame(rows)
+    append_events(df)
+    return {"ok": True, "n": len(df)}
+@app.get("/api/aggregate")
+def get_agg():
+    agg = aggregate()
+    return JSONResponse(content=agg.to_dict(orient="records"))
+@app.post("/api/optimize")
+def optimize():
+    agg = aggregate()
+    if agg.empty:
+        return {"message": "no data"}
+    rec = BANDIT.recommend(agg)
+    return JSONResponse(content=rec)
+@app.post("/api/uplift")
+def uplift():
+    agg = aggregate()
+    if agg.empty:
+        return {"message": "no data"}
+    res = fit_uplift_binary(agg)
+    return JSONResponse(content=res)

bandit.py ADDED Viewed

	@@ -0,0 +1,115 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+from typing import Dict, Any, Tuple
+# ------------------------------
+# Empirical Bayes hierarchical Beta-Binomial + Thompson Sampling
+# ------------------------------
+class EmpiricalBayesHierarchicalThompson:
+    """
+    各アーム (medium, creative) のクリック率 p を Beta 事前分布で表現。
+    事前 Beta(a0, b0) は全アームの経験ベイズ推定で安定化。
+    事後: Beta(a0 + clicks, b0 + impressions - clicks)
+    * 少データ時に極端な推定を避ける
+    * Thompson Sampling により配分を提案
+    * 自動停止/増配分判断を提供
+    """
+    def __init__(self, min_explore: float = 0.05, margin: float = 0.0, n_draws: int = 20000, seed: int = 42):
+        self.min_explore = min_explore  # 各アームの最低配分
+        self.margin = margin            # 改善余地のマージン（ex: 0.002 = 0.2pp）
+        self.n_draws = n_draws
+        self.rng = np.random.default_rng(seed)
+    @staticmethod
+    def _eb_prior_by_moments(arm_df: pd.DataFrame) -> Tuple[float, float]:
+        """
+        アーム別 CTR の分散を利用して Beta(a0, b0) を MoM 推定。
+        既知の式: mean=m, var=v => a0 = m*(m*(1-m)/v - 1), b0 = (1-m)*(m*(1-m)/v - 1)
+        v が小さすぎる/0 の場合は弱情報事前を返す。
+        """
+        # 各アームの粗推定 CTR（Laplace 平滑で安定化）
+        ctr = (arm_df["clicks"] + 1) / (arm_df["impressions"] + 2)
+        m = float(np.clip(ctr.mean(), 1e-6, 1 - 1e-6))
+        v = float(np.var(ctr, ddof=1))
+        if not np.isfinite(v) or v <= 1e-8:
+            # ほぼ同一のCTR → 弱情報事前
+            return 1.0, 1.0
+        k = m * (1 - m) / v - 1.0
+        if k <= 0 or not np.isfinite(k):
+            return 1.0, 1.0
+        a0 = m * k
+        b0 = (1 - m) * k
+        # 上限/下限を設定
+        a0 = float(np.clip(a0, 0.5, 1000))
+        b0 = float(np.clip(b0, 0.5, 1000))
+        return a0, b0
+    def _posterior_params(self, df: pd.DataFrame) -> pd.DataFrame:
+        a0, b0 = self._eb_prior_by_moments(df)
+        post = df.copy()
+        post["alpha"] = a0 + post["clicks"].astype(float)
+        post["beta"]  = b0 + (post["impressions"] - post["clicks"]).astype(float)
+        post["post_mean"] = post["alpha"] / (post["alpha"] + post["beta"])
+        post["post_var"]  = (post["alpha"] * post["beta"]) / (((post["alpha"] + post["beta"])**2) * (post["alpha"] + post["beta"] + 1))
+        post["a0"] = a0
+        post["b0"] = b0
+        return post
+    def recommend(self, df: pd.DataFrame) -> Dict[str, Any]:
+        """媒体ごとにTSで配分率を提案し、停止/増配分を判断。"""
+        post = self._posterior_params(df)
+        out = {}
+        for medium, g in post.groupby("medium"):
+            arms = g.reset_index(drop=True)
+            K = len(arms)
+            samples = self.rng.beta(arms["alpha"].values, arms["beta"].values, size=(self.n_draws, K))
+            # ベースライン（control があればそれを優先）
+            if (arms["is_control"] == 1).any():
+                base_idx = int(arms.index[arms["is_control"] == 1][0])
+            else:
+                base_idx = int(arms["post_mean"].idxmax())
+            base_col = list(arms.index).index(base_idx)
+            winners = np.argmax(samples, axis=1)
+            win_prob = np.bincount(winners, minlength=K) / self.n_draws
+            # 各アームがベースより (margin) だけ下回る確率
+            worse_than_base = (samples.T < (samples[:, base_col] - self.margin)).mean(axis=1)
+            # 停止・増配分判定
+            decisions = []
+            for k in range(K):
+                d = {
+                    "creative": arms.loc[k, "creative"],
+                    "is_control": int(arms.loc[k, "is_control"]),
+                    "post_mean": float(arms.loc[k, "post_mean"]),
+                    "win_prob": float(win_prob[k]),
+                    "worse_than_base_prob": float(worse_than_base[k]),
+                    "status": "hold"
+                }
+                if d["worse_than_base_prob"] >= 0.9 and arms.loc[k, "impressions"] >= 200:
+                    d["status"] = "stop"
+                elif d["win_prob"] >= 0.95 and arms.loc[k, "impressions"] >= 200:
+                    d["status"] = "boost"
+                decisions.append(d)
+            # 配分：勝者確率に基づき、min_explore を確保
+            alloc = win_prob.copy()
+            alloc = alloc / alloc.sum()
+            alloc = np.clip(alloc, self.min_explore, 1.0)
+            alloc = alloc / alloc.sum()
+            out[str(medium)] = {
+                "arms": arms[["creative", "impressions", "clicks", "post_mean", "is_control"]].assign(
+                    win_prob=win_prob,
+                    worse_than_base_prob=worse_than_base,
+                ).to_dict(orient="records"),
+                "allocation": {str(arms.loc[k, "creative"]): float(alloc[k]) for k in range(K)},
+                "decisions": decisions,
+                "posterior_prior": {"a0": float(arms.loc[0, "a0"]), "b0": float(arms.loc[0, "b0"])},
+            }
+        return out

causal.py ADDED Viewed

	@@ -0,0 +1,120 @@

+from __future__ import annotations
+import numpy as np
+import pandas as pd
+import pymc as pm
+import pytensor.tensor as at
+from typing import Dict, Any, Optional
+"""
+階層ロジスティック回帰で uplift（コントロールとの差）を推定。
+- 目的変数: click (0/1) 推奨。集計単位が日次のときは二項モデルに切替可能。
+- 階層: 媒体ランダム効果。
+- 処置: creative（control=1 の行を基準）
+- 共変量: 任意の数値列（正規化して利用）
+"""
+def _zscore(df: pd.DataFrame, cols):
+    x = df[cols].astype(float)
+    return (x - x.mean()) / (x.std(ddof=0) + 1e-9)
+def fit_uplift_binary(
+    df: pd.DataFrame,
+    outcome_col: str = "click_bin",
+    medium_col: str = "medium",
+    creative_col: str = "creative",
+    control_flag_col: str = "is_control",
+    feature_cols: Optional[list] = None,
+    draws: int = 1000,
+    target_accept: float = 0.9,
+    random_seed: int = 42,
+) -> Dict[str, Any]:
+    # 前処理
+    d = df.copy().reset_index(drop=True)
+    if outcome_col not in d.columns:
+        # 集計データ (impr, clicks) から擬似サンプル化
+        # 二項尤度に切替
+        d["n"] = d["impressions"].astype(int)
+        d["y"] = d["clicks"].astype(int)
+        binomial = True
+    else:
+        d["y"] = d[outcome_col].astype(int)
+        d["n"] = 1
+        binomial = False
+    # creative の one-hot と control 基準
+    creatives = d[creative_col].astype(str).unique().tolist()
+    control_creatives = d.loc[d[control_flag_col] == 1, creative_col].astype(str).unique().tolist()
+    control_ref = control_creatives[0] if len(control_creatives) else creatives[0]
+    d["creative_idx"] = d[creative_col].astype(str).apply(lambda x: creatives.index(x)).astype(int)
+    d["medium_idx"] = d[medium_col].astype(str).astype('category').cat.codes.values
+    # 特徴量
+    X = None
+    if feature_cols:
+        X = _zscore(d, feature_cols).values
+        p = X.shape[1]
+    else:
+        p = 0
+    with pm.Model() as model:
+        # ランダム効果: 媒体
+        n_medium = int(d["medium_idx"].max()) + 1
+        mu_re = pm.Normal("mu_re", 0.0, 1.0)
+        sd_re = pm.HalfNormal("sd_re", 1.0)
+        z_re = pm.Normal("z_re", 0.0, 1.0, shape=n_medium)
+        b_medium = pm.Deterministic("b_medium", mu_re + z_re * sd_re)
+        # creative 固定効果（baseline = control_ref）
+        n_creative = len(creatives)
+        b0 = pm.Normal("intercept", 0.0, 1.5)
+        b_cre = pm.Normal("b_cre_raw", 0.0, 1.0, shape=n_creative)
+        # 基準調整
+        ref_idx = creatives.index(control_ref)
+        b_cre_adj = at.set_subtensor(b_cre[ref_idx], 0.0)
+        # 連続特徴量
+        if p > 0:
+            b_x = pm.Normal("b_x", 0.0, 1.0, shape=p)
+            lin = b0 + b_cre_adj[d["creative_idx"].values] + b_medium[d["medium_idx"]].values + at.dot(X, b_x)
+        else:
+            lin = b0 + b_cre_adj[d["creative_idx"].values] + b_medium[d["medium_idx"]].values
+        p_click = pm.Deterministic("p_click", pm.math.sigmoid(lin))
+        if binomial:
+            pm.Binomial("y_obs", n=d["n"].values, p=p_click, observed=d["y"].values)
+        else:
+            pm.Bernoulli("y_obs", p=p_click, observed=d["y"].values)
+        idata = pm.sample(draws=draws, tune=draws, chains=2, target_accept=target_accept, random_seed=random_seed, progressbar=False)
+    # uplift: 各 creative の p_click を control_ref と比較
+    post = idata.posterior
+    # サンプル次元: chain, draw
+    b0_s = post["intercept"].stack(sample=("chain", "draw"))
+    b_cre_s = post["b_cre_raw"].stack(sample=("chain", "draw"))
+    # uplift は "平均的ユーザー" と "平均的媒体効果" を前提に比較
+    mu_re = post["mu_re"].stack(sample=("chain", "draw"))
+    def sigmoid(x):
+        return 1 / (1 + np.exp(-x))
+    results = []
+    for cr in creatives:
+        idx = creatives.index(cr)
+        lin_t = b0_s + b_cre_s.isel(b_cre_raw_dim_0=idx) + mu_re
+        lin_c = b0_s + b_cre_s.isel(b_cre_raw_dim_0=ref_idx) + mu_re
+        uplift = sigmoid(lin_t) - sigmoid(lin_c)
+        results.append({
+            "creative": cr,
+            "uplift_mean": float(uplift.mean().item()),
+            "uplift_p_gt0": float((uplift > 0).mean().item()),
+            "control_ref": control_ref,
+        })
+    return {
+        "control_ref": control_ref,
+        "creatives": creatives,
+        "results": results,
+    }

dashboard.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from __future__ import annotations
+import pandas as pd
+import gradio as gr
+import plotly.express as px
+from data import read_events, aggregate
+from bandit import EmpiricalBayesHierarchicalThompson
+from causal import fit_uplift_binary
+BANDIT = EmpiricalBayesHierarchicalThompson(min_explore=0.05, margin=0.0, n_draws=10000)
+def ui_refresh_tables():
+    df = read_events()
+    agg = aggregate()
+    return df, agg
+def ui_recommend():
+    agg = aggregate()
+    if agg.empty:
+        return {"message": "No data yet. Upload or POST /api/ingest first."}
+    rec = BANDIT.recommend(agg)
+    return rec
+def ui_plot_posteriors(medium: str):
+    agg = aggregate()
+    if agg.empty:
+        return gr.update(visible=False), "No data"
+    g = agg[agg["medium"].astype(str) == str(medium)].copy()
+    if g.empty:
+        return gr.update(visible=False), f"No data for medium={medium}"
+    # 事後平均の棒グラフ（Laplace 平滑 CTR）
+    g["ctr"] = (g["clicks"] + 1) / (g["impressions"] + 2)
+    fig = px.bar(g, x="creative", y="ctr", color="is_control", barmode="group", title=f"CTR (Laplace) by creative @ {medium}")
+    return gr.Plot(fig), ""
+def ui_fit_uplift():
+    agg = aggregate()
+    if agg.empty:
+        return {"message": "No data"}
+    res = fit_uplift_binary(agg)
+    return res
+def build_ui():
+    with gr.Blocks(title="AdCopy MAB Optimizer Pro") as demo:
+        gr.Markdown("# AdCopy MAB Optimizer Pro — Hierarchical TS + Uplift")
+        with gr.Tab("Data"):
+            btn = gr.Button("Refresh")
+            grid = gr.Dataframe(headers=["ts","date","medium","creative","is_control","impressions","clicks","conversions","cost","features_json"], wrap=True)
+            grid_agg = gr.Dataframe()
+            btn.click(ui_refresh_tables, outputs=[grid, grid_agg])
+        with gr.Tab("Bandit"):
+            bbtn = gr.Button("Suggest Allocation (TS)")
+            jout = gr.JSON()
+            bbtn.click(ui_recommend, outputs=jout)
+            with gr.Row():
+                medium = gr.Textbox(label="Medium for Plot", value="FB")
+                plot = gr.Plot(visible=False)
+                msg = gr.Markdown()
+            plot_btn = gr.Button("Plot CTR by Creative")
+            plot_btn.click(ui_plot_posteriors, inputs=[medium], outputs=[plot, msg])
+        with gr.Tab("Uplift (Causal)"):
+            cbtn = gr.Button("Fit Uplift Model")
+            cout = gr.JSON()
+            cbtn.click(ui_fit_uplift, outputs=cout)
+    return demo

data.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from __future__ import annotations
+import os
+import json
+from typing import Optional, Dict, Any
+from datetime import datetime
+import pandas as pd
+DATA_DIR = os.environ.get("DATA_DIR", "./data")
+LOG_PATH = os.path.join(DATA_DIR, "events.csv")
+META_PATH = os.path.join(DATA_DIR, "meta.json")
+SCHEMA = [
+    "ts",           # ISO timestamp
+    "data",         # YYYY-MM-DD(便宜)
+    "medium",       # 媒体名(例:FB,GDN)
+    "creative",     # クリエイティブID/名前(例:A1)
+    "is_control"    # 0/1(コントロール群)
+    "impressions",  # 表示数
+    "clicks",       # クリック数(または目的コンバージョン)
+    "conversions",  # 追加のCV(任意:0でもOK)
+    "cost",         # コスト(任意)
+    "features_json" # クリエイティブ特徴量(dictをJSON文字列で)
+]
+os.makedirs(DATA_DIR, exist_ok=True)
+if not os.path.exists(LOG_PATH):
+    pd.DataFrame(columns=SCHEMA).to_csv(LOG_PATH, index=False)
+if not os.path.exists(META_PATH):
+    with open(META_PATH, "w" , encoding="utf-8") as f:
+        json.dump({"created_at": datetime.utcnow().isoformat()}, f)
+def read_events() -> pd.DataFrame:
+    df = pd.read_csv(LOG_PATH)
+    if df.empty:
+        return df
+    # 型整備
+    df["data"] = pd.to_datetime(df["date"]).dt.date.astype(str)
+    df["is_control"] = df["is_control"].fillna(0).astype(int)
+    for col in ["impressions", "clicks", "conversions"]:
+        df[col] = df[col].fillna(0).astype(int)
+    df["cost"] = df["cost"].fillna(0.0).astype(float)
+    df["features_json"] = df["features_json"].fillna("{}")
+    return df
+def append_events(rows: pd.DataFrame) -> None:
+    # 必須列チェック& 補完
+    for c in SCHEMA:
+        if c not in rows.columns:
+            if c == "features_json":
+                rows[c] = "{}"
+            elif c == "ts":
+                rows[c] = datetime.utcnow().isoformat()
+            elif c == "date":
+                rows[c] = datetime.utcnow().date().isoformat()
+            elif c in ("impressions", "clicks", "conversions", "is_control"):
+                rows[c] = 0
+            elif c == "cost":
+                rows[c] = 0.0
+            else:
+                rows[c] = None
+    rows = rows[SCHEMA]
+    rows.to_csv(LOG_PATH, mode="a", header=False, index=False)
+def aggregate(levels=("medium", "creative")) -> pd.DataFrame:
+    df = read_events()
+    if df.empty:
+        return pd.DataFrame(columns=[*levels, "is_control" , "impressions", "clicks", "conversions", "cost"])
+    g = df.groupby([*levels, "is_control"], dropna=False).agg(
+        impressions=("impressions", "sum"),
+        clicks=("clicks", "sum"),
+        conversions=("conversions", "sum"),
+        cost=("cost", "sum"),
+    ).reset_index()
+    return g

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.112.2
+uvicorn[standard]==0.30.6
+pandas==2.2.2
+numpy==1.26.4
+scipy==1.12.0
+pymc==5.15.0
+arviz==0.18.0
+scikit-learn==1.5.1
+plotly==5.23.0
+gradio==4.44.0
+pydantic==2.8.2
+orjson==3.10.7
+python-multipart==0.0.9

utils.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from __future__ import annotations
+import json
+from typing import Any, Dict
+def to_jsonable(obj: Any) -> Any:
+    """Safely convert pandas/numpy objects to JSON-able python types. """
+    try:
+        import numpy as np
+        import pandas as pd
+    except Exception:
+        np = None
+        pd = None
+    if pd is not None and isinstance(obj, pd.DataFrame):
+        return obj.to_dict(orient="records")
+    if pd is not None and isinstance(obj, pd.Series):
+        return obj.to_dict()
+    if np is not None and isinstance(obj, (np.integer,)):
+        return int(obj)
+    if np is not None and isinstance(obj, (np.floating,)):
+        f = float(obj)
+        # JSON NaNを避ける
+        if f !=f:
+            return None
+        return f
+    if isinstance(obj, (set,)):
+        return list(obj)
+    if isinstance(obj,(bytes, bytearray)):
+        return obj.decode("utf-8", errors="ignore")
+    return obj
+def dumps(d: Dict[str, Any]) -> str:
+    return json.dumps(d, ensure_ascii=False, default=to_jsonable)