ab-testing-causal / src /cuped.py
fikri0o0's picture
2026-06-04: Initial deployment — A/B Testing & Causal Inference Simulator
4256820
"""
CUPED – Controlled-experiment Using Pre-Existing Data.
Reference: Deng, Xu, Kohavi, Walker (2013) "Improving the Sensitivity of
Online Controlled Experiments by Utilizing Pre-Experiment Data."
Microsoft Research. KDD 2013.
Key idea
--------
The post-experiment metric Y is correlated with a pre-experiment covariate X
(e.g., last month's conversion rate). We subtract the part of Y that is
"predictable" from X, leaving a lower-variance residual. This reduces the
required sample size by (1 – ρ²), where ρ = Corr(Y, X).
Usage
-----
from src.cuped import CUPEDResult, cuped_ttest
result = cuped_ttest(df, metric_col="converted", pre_metric_col="pre_conv")
"""
from __future__ import annotations
import numpy as np
import pandas as pd
from dataclasses import dataclass, asdict
from scipy import stats
@dataclass
class CUPEDResult:
theta: float # regression coefficient of X on Y
corr_pre_post: float # Pearson ρ between pre and post metric
variance_reduction_pct: float # % reduction in metric variance
# Raw (unadjusted) test
raw_mean_control: float
raw_mean_treatment: float
raw_t_stat: float
raw_p_value: float
raw_significant: bool
# CUPED-adjusted test
cuped_mean_control: float
cuped_mean_treatment: float
cuped_t_stat: float
cuped_p_value: float
cuped_significant: bool
# Effective sample size saving
sample_size_reduction_pct: float # how much smaller an experiment could be
alpha: float
def to_dict(self) -> dict:
return asdict(self)
def _cuped_adjust(
y: np.ndarray,
x: np.ndarray,
x_global_mean: float,
) -> tuple[np.ndarray, float]:
"""
Return (y_adjusted, theta) where:
y_adj = y - theta * (x - E[X])
theta = Cov(Y, X) / Var(X)
"""
cov_matrix = np.cov(y, x)
theta = cov_matrix[0, 1] / cov_matrix[1, 1] if cov_matrix[1, 1] > 0 else 0.0
y_adj = y - theta * (x - x_global_mean)
return y_adj, theta
def cuped_ttest(
df: pd.DataFrame,
metric_col: str,
pre_metric_col: str,
treatment_col: str = "treatment",
alpha: float = 0.05,
) -> CUPEDResult:
"""
Run a t-test on both raw and CUPED-adjusted metrics.
Parameters
----------
df : DataFrame with columns [metric_col, pre_metric_col, treatment_col]
metric_col : post-experiment outcome (e.g., "revenue", "converted")
pre_metric_col: pre-experiment covariate (same metric from before the experiment)
treatment_col : 0 = control, 1 = treatment
alpha : significance level
"""
ctrl = df[df[treatment_col] == 0]
trt = df[df[treatment_col] == 1]
y_ctrl = ctrl[metric_col].values.astype(float)
y_trt = trt[metric_col].values.astype(float)
x_ctrl = ctrl[pre_metric_col].values.astype(float)
x_trt = trt[pre_metric_col].values.astype(float)
# Theta is estimated on the full dataset (pooled) — avoids bias
y_all = df[metric_col].values.astype(float)
x_all = df[pre_metric_col].values.astype(float)
x_global_mean = x_all.mean()
_, theta = _cuped_adjust(y_all, x_all, x_global_mean)
# Apply same theta to each group (uses global X̄ to keep E[Y_adj] = E[Y])
y_ctrl_adj = y_ctrl - theta * (x_ctrl - x_global_mean)
y_trt_adj = y_trt - theta * (x_trt - x_global_mean)
# Variance reduction
var_raw = np.var(y_all)
var_cuped = np.var(y_all - theta * (x_all - x_global_mean))
var_reduction = (1.0 - var_cuped / var_raw) * 100.0 if var_raw > 0 else 0.0
# Correlation of pre and post metric
corr = float(np.corrcoef(y_all, x_all)[0, 1])
# Raw t-test
t_raw, p_raw = stats.ttest_ind(y_ctrl, y_trt)
# CUPED t-test
t_cup, p_cup = stats.ttest_ind(y_ctrl_adj, y_trt_adj)
# Sample size reduction (CUPED needs (1-ρ²) fraction of original n)
ss_reduction = corr ** 2 * 100.0
return CUPEDResult(
theta=round(float(theta), 6),
corr_pre_post=round(corr, 4),
variance_reduction_pct=round(var_reduction, 2),
raw_mean_control=round(float(y_ctrl.mean()), 6),
raw_mean_treatment=round(float(y_trt.mean()), 6),
raw_t_stat=round(float(t_raw), 4),
raw_p_value=round(float(p_raw), 6),
raw_significant=bool(p_raw < alpha),
cuped_mean_control=round(float(y_ctrl_adj.mean()), 6),
cuped_mean_treatment=round(float(y_trt_adj.mean()), 6),
cuped_t_stat=round(float(t_cup), 4),
cuped_p_value=round(float(p_cup), 6),
cuped_significant=bool(p_cup < alpha),
sample_size_reduction_pct=round(ss_reduction, 2),
alpha=alpha,
)
def simulate_cuped_benefit(
n_per_group: int,
baseline_rate: float,
true_effect: float,
corr: float, # correlation between pre and post metric
n_sims: int = 2000,
alpha: float = 0.05,
seed: int = 42,
) -> dict:
"""
Simulate A/B experiments at a given pre-post correlation to show
how CUPED variance reduction improves power.
Returns power estimates for raw and CUPED-adjusted tests.
"""
rng = np.random.default_rng(seed)
raw_sig = 0
cuped_sig = 0
for _ in range(n_sims):
# Generate pre-experiment metric (user latent quality)
latent = rng.standard_normal(n_per_group * 2)
pre = latent + rng.standard_normal(n_per_group * 2)
# Post-experiment metric correlated with pre
noise = rng.standard_normal(n_per_group * 2)
post_base = corr * latent + np.sqrt(1 - corr ** 2) * noise
ctrl_post = post_base[:n_per_group]
trt_post = post_base[n_per_group:] + true_effect # add treatment effect
ctrl_pre = pre[:n_per_group]
trt_pre = pre[n_per_group:]
# Raw test
_, p_raw = stats.ttest_ind(ctrl_post, trt_post)
# CUPED adjustment
y_all = np.concatenate([ctrl_post, trt_post])
x_all = np.concatenate([ctrl_pre, trt_pre])
x_mean = x_all.mean()
cov = np.cov(y_all, x_all)
theta = cov[0, 1] / cov[1, 1] if cov[1, 1] > 0 else 0.0
ctrl_adj = ctrl_post - theta * (ctrl_pre - x_mean)
trt_adj = trt_post - theta * (trt_pre - x_mean)
_, p_cup = stats.ttest_ind(ctrl_adj, trt_adj)
if p_raw < alpha:
raw_sig += 1
if p_cup < alpha:
cuped_sig += 1
return {
"n_per_group": n_per_group,
"true_effect": true_effect,
"pre_post_correlation": corr,
"theoretical_variance_reduction_pct": round(corr ** 2 * 100, 2),
"raw_power": round(raw_sig / n_sims, 4),
"cuped_power": round(cuped_sig / n_sims, 4),
"power_gain_pct": round((cuped_sig - raw_sig) / max(raw_sig, 1) * 100, 1),
}