Spaces:
Running
Running
| """ | |
| CUPED – Controlled-experiment Using Pre-Existing Data. | |
| Reference: Deng, Xu, Kohavi, Walker (2013) "Improving the Sensitivity of | |
| Online Controlled Experiments by Utilizing Pre-Experiment Data." | |
| Microsoft Research. KDD 2013. | |
| Key idea | |
| -------- | |
| The post-experiment metric Y is correlated with a pre-experiment covariate X | |
| (e.g., last month's conversion rate). We subtract the part of Y that is | |
| "predictable" from X, leaving a lower-variance residual. This reduces the | |
| required sample size by (1 – ρ²), where ρ = Corr(Y, X). | |
| Usage | |
| ----- | |
| from src.cuped import CUPEDResult, cuped_ttest | |
| result = cuped_ttest(df, metric_col="converted", pre_metric_col="pre_conv") | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| import pandas as pd | |
| from dataclasses import dataclass, asdict | |
| from scipy import stats | |
| class CUPEDResult: | |
| theta: float # regression coefficient of X on Y | |
| corr_pre_post: float # Pearson ρ between pre and post metric | |
| variance_reduction_pct: float # % reduction in metric variance | |
| # Raw (unadjusted) test | |
| raw_mean_control: float | |
| raw_mean_treatment: float | |
| raw_t_stat: float | |
| raw_p_value: float | |
| raw_significant: bool | |
| # CUPED-adjusted test | |
| cuped_mean_control: float | |
| cuped_mean_treatment: float | |
| cuped_t_stat: float | |
| cuped_p_value: float | |
| cuped_significant: bool | |
| # Effective sample size saving | |
| sample_size_reduction_pct: float # how much smaller an experiment could be | |
| alpha: float | |
| def to_dict(self) -> dict: | |
| return asdict(self) | |
| def _cuped_adjust( | |
| y: np.ndarray, | |
| x: np.ndarray, | |
| x_global_mean: float, | |
| ) -> tuple[np.ndarray, float]: | |
| """ | |
| Return (y_adjusted, theta) where: | |
| y_adj = y - theta * (x - E[X]) | |
| theta = Cov(Y, X) / Var(X) | |
| """ | |
| cov_matrix = np.cov(y, x) | |
| theta = cov_matrix[0, 1] / cov_matrix[1, 1] if cov_matrix[1, 1] > 0 else 0.0 | |
| y_adj = y - theta * (x - x_global_mean) | |
| return y_adj, theta | |
| def cuped_ttest( | |
| df: pd.DataFrame, | |
| metric_col: str, | |
| pre_metric_col: str, | |
| treatment_col: str = "treatment", | |
| alpha: float = 0.05, | |
| ) -> CUPEDResult: | |
| """ | |
| Run a t-test on both raw and CUPED-adjusted metrics. | |
| Parameters | |
| ---------- | |
| df : DataFrame with columns [metric_col, pre_metric_col, treatment_col] | |
| metric_col : post-experiment outcome (e.g., "revenue", "converted") | |
| pre_metric_col: pre-experiment covariate (same metric from before the experiment) | |
| treatment_col : 0 = control, 1 = treatment | |
| alpha : significance level | |
| """ | |
| ctrl = df[df[treatment_col] == 0] | |
| trt = df[df[treatment_col] == 1] | |
| y_ctrl = ctrl[metric_col].values.astype(float) | |
| y_trt = trt[metric_col].values.astype(float) | |
| x_ctrl = ctrl[pre_metric_col].values.astype(float) | |
| x_trt = trt[pre_metric_col].values.astype(float) | |
| # Theta is estimated on the full dataset (pooled) — avoids bias | |
| y_all = df[metric_col].values.astype(float) | |
| x_all = df[pre_metric_col].values.astype(float) | |
| x_global_mean = x_all.mean() | |
| _, theta = _cuped_adjust(y_all, x_all, x_global_mean) | |
| # Apply same theta to each group (uses global X̄ to keep E[Y_adj] = E[Y]) | |
| y_ctrl_adj = y_ctrl - theta * (x_ctrl - x_global_mean) | |
| y_trt_adj = y_trt - theta * (x_trt - x_global_mean) | |
| # Variance reduction | |
| var_raw = np.var(y_all) | |
| var_cuped = np.var(y_all - theta * (x_all - x_global_mean)) | |
| var_reduction = (1.0 - var_cuped / var_raw) * 100.0 if var_raw > 0 else 0.0 | |
| # Correlation of pre and post metric | |
| corr = float(np.corrcoef(y_all, x_all)[0, 1]) | |
| # Raw t-test | |
| t_raw, p_raw = stats.ttest_ind(y_ctrl, y_trt) | |
| # CUPED t-test | |
| t_cup, p_cup = stats.ttest_ind(y_ctrl_adj, y_trt_adj) | |
| # Sample size reduction (CUPED needs (1-ρ²) fraction of original n) | |
| ss_reduction = corr ** 2 * 100.0 | |
| return CUPEDResult( | |
| theta=round(float(theta), 6), | |
| corr_pre_post=round(corr, 4), | |
| variance_reduction_pct=round(var_reduction, 2), | |
| raw_mean_control=round(float(y_ctrl.mean()), 6), | |
| raw_mean_treatment=round(float(y_trt.mean()), 6), | |
| raw_t_stat=round(float(t_raw), 4), | |
| raw_p_value=round(float(p_raw), 6), | |
| raw_significant=bool(p_raw < alpha), | |
| cuped_mean_control=round(float(y_ctrl_adj.mean()), 6), | |
| cuped_mean_treatment=round(float(y_trt_adj.mean()), 6), | |
| cuped_t_stat=round(float(t_cup), 4), | |
| cuped_p_value=round(float(p_cup), 6), | |
| cuped_significant=bool(p_cup < alpha), | |
| sample_size_reduction_pct=round(ss_reduction, 2), | |
| alpha=alpha, | |
| ) | |
| def simulate_cuped_benefit( | |
| n_per_group: int, | |
| baseline_rate: float, | |
| true_effect: float, | |
| corr: float, # correlation between pre and post metric | |
| n_sims: int = 2000, | |
| alpha: float = 0.05, | |
| seed: int = 42, | |
| ) -> dict: | |
| """ | |
| Simulate A/B experiments at a given pre-post correlation to show | |
| how CUPED variance reduction improves power. | |
| Returns power estimates for raw and CUPED-adjusted tests. | |
| """ | |
| rng = np.random.default_rng(seed) | |
| raw_sig = 0 | |
| cuped_sig = 0 | |
| for _ in range(n_sims): | |
| # Generate pre-experiment metric (user latent quality) | |
| latent = rng.standard_normal(n_per_group * 2) | |
| pre = latent + rng.standard_normal(n_per_group * 2) | |
| # Post-experiment metric correlated with pre | |
| noise = rng.standard_normal(n_per_group * 2) | |
| post_base = corr * latent + np.sqrt(1 - corr ** 2) * noise | |
| ctrl_post = post_base[:n_per_group] | |
| trt_post = post_base[n_per_group:] + true_effect # add treatment effect | |
| ctrl_pre = pre[:n_per_group] | |
| trt_pre = pre[n_per_group:] | |
| # Raw test | |
| _, p_raw = stats.ttest_ind(ctrl_post, trt_post) | |
| # CUPED adjustment | |
| y_all = np.concatenate([ctrl_post, trt_post]) | |
| x_all = np.concatenate([ctrl_pre, trt_pre]) | |
| x_mean = x_all.mean() | |
| cov = np.cov(y_all, x_all) | |
| theta = cov[0, 1] / cov[1, 1] if cov[1, 1] > 0 else 0.0 | |
| ctrl_adj = ctrl_post - theta * (ctrl_pre - x_mean) | |
| trt_adj = trt_post - theta * (trt_pre - x_mean) | |
| _, p_cup = stats.ttest_ind(ctrl_adj, trt_adj) | |
| if p_raw < alpha: | |
| raw_sig += 1 | |
| if p_cup < alpha: | |
| cuped_sig += 1 | |
| return { | |
| "n_per_group": n_per_group, | |
| "true_effect": true_effect, | |
| "pre_post_correlation": corr, | |
| "theoretical_variance_reduction_pct": round(corr ** 2 * 100, 2), | |
| "raw_power": round(raw_sig / n_sims, 4), | |
| "cuped_power": round(cuped_sig / n_sims, 4), | |
| "power_gain_pct": round((cuped_sig - raw_sig) / max(raw_sig, 1) * 100, 1), | |
| } | |