Spaces:

fikri0o0
/

ab-testing-causal

Running

App Files Files Community

ab-testing-causal / src /cuped.py

fikri0o0

2026-06-04: Initial deployment — A/B Testing & Causal Inference Simulator

4256820 2 days ago

raw

history blame contribute delete

6.78 kB

	"""
	CUPED – Controlled-experiment Using Pre-Existing Data.

	Reference: Deng, Xu, Kohavi, Walker (2013) "Improving the Sensitivity of
	Online Controlled Experiments by Utilizing Pre-Experiment Data."
	Microsoft Research. KDD 2013.

	Key idea
	--------
	The post-experiment metric Y is correlated with a pre-experiment covariate X
	(e.g., last month's conversion rate). We subtract the part of Y that is
	"predictable" from X, leaving a lower-variance residual. This reduces the
	required sample size by (1 – ρ²), where ρ = Corr(Y, X).

	Usage
	-----
	from src.cuped import CUPEDResult, cuped_ttest
	result = cuped_ttest(df, metric_col="converted", pre_metric_col="pre_conv")
	"""
	from __future__ import annotations

	import numpy as np
	import pandas as pd
	from dataclasses import dataclass, asdict

	from scipy import stats


	@dataclass
	class CUPEDResult:
	theta: float # regression coefficient of X on Y
	corr_pre_post: float # Pearson ρ between pre and post metric
	variance_reduction_pct: float # % reduction in metric variance

	# Raw (unadjusted) test
	raw_mean_control: float
	raw_mean_treatment: float
	raw_t_stat: float
	raw_p_value: float
	raw_significant: bool

	# CUPED-adjusted test
	cuped_mean_control: float
	cuped_mean_treatment: float
	cuped_t_stat: float
	cuped_p_value: float
	cuped_significant: bool

	# Effective sample size saving
	sample_size_reduction_pct: float # how much smaller an experiment could be

	alpha: float

	def to_dict(self) -> dict:
	return asdict(self)


	def _cuped_adjust(
	y: np.ndarray,
	x: np.ndarray,
	x_global_mean: float,
	) -> tuple[np.ndarray, float]:
	"""
	Return (y_adjusted, theta) where:
	y_adj = y - theta * (x - E[X])
	theta = Cov(Y, X) / Var(X)
	"""
	cov_matrix = np.cov(y, x)
	theta = cov_matrix[0, 1] / cov_matrix[1, 1] if cov_matrix[1, 1] > 0 else 0.0
	y_adj = y - theta * (x - x_global_mean)
	return y_adj, theta


	def cuped_ttest(
	df: pd.DataFrame,
	metric_col: str,
	pre_metric_col: str,
	treatment_col: str = "treatment",
	alpha: float = 0.05,
	) -> CUPEDResult:
	"""
	Run a t-test on both raw and CUPED-adjusted metrics.

	Parameters
	----------
	df : DataFrame with columns [metric_col, pre_metric_col, treatment_col]
	metric_col : post-experiment outcome (e.g., "revenue", "converted")
	pre_metric_col: pre-experiment covariate (same metric from before the experiment)
	treatment_col : 0 = control, 1 = treatment
	alpha : significance level
	"""
	ctrl = df[df[treatment_col] == 0]
	trt = df[df[treatment_col] == 1]

	y_ctrl = ctrl[metric_col].values.astype(float)
	y_trt = trt[metric_col].values.astype(float)
	x_ctrl = ctrl[pre_metric_col].values.astype(float)
	x_trt = trt[pre_metric_col].values.astype(float)

	# Theta is estimated on the full dataset (pooled) — avoids bias
	y_all = df[metric_col].values.astype(float)
	x_all = df[pre_metric_col].values.astype(float)
	x_global_mean = x_all.mean()

	_, theta = _cuped_adjust(y_all, x_all, x_global_mean)

	# Apply same theta to each group (uses global X̄ to keep E[Y_adj] = E[Y])
	y_ctrl_adj = y_ctrl - theta * (x_ctrl - x_global_mean)
	y_trt_adj = y_trt - theta * (x_trt - x_global_mean)

	# Variance reduction
	var_raw = np.var(y_all)
	var_cuped = np.var(y_all - theta * (x_all - x_global_mean))
	var_reduction = (1.0 - var_cuped / var_raw) * 100.0 if var_raw > 0 else 0.0

	# Correlation of pre and post metric
	corr = float(np.corrcoef(y_all, x_all)[0, 1])

	# Raw t-test
	t_raw, p_raw = stats.ttest_ind(y_ctrl, y_trt)

	# CUPED t-test
	t_cup, p_cup = stats.ttest_ind(y_ctrl_adj, y_trt_adj)

	# Sample size reduction (CUPED needs (1-ρ²) fraction of original n)
	ss_reduction = corr ** 2 * 100.0

	return CUPEDResult(
	theta=round(float(theta), 6),
	corr_pre_post=round(corr, 4),
	variance_reduction_pct=round(var_reduction, 2),
	raw_mean_control=round(float(y_ctrl.mean()), 6),
	raw_mean_treatment=round(float(y_trt.mean()), 6),
	raw_t_stat=round(float(t_raw), 4),
	raw_p_value=round(float(p_raw), 6),
	raw_significant=bool(p_raw < alpha),
	cuped_mean_control=round(float(y_ctrl_adj.mean()), 6),
	cuped_mean_treatment=round(float(y_trt_adj.mean()), 6),
	cuped_t_stat=round(float(t_cup), 4),
	cuped_p_value=round(float(p_cup), 6),
	cuped_significant=bool(p_cup < alpha),
	sample_size_reduction_pct=round(ss_reduction, 2),
	alpha=alpha,
	)


	def simulate_cuped_benefit(
	n_per_group: int,
	baseline_rate: float,
	true_effect: float,
	corr: float, # correlation between pre and post metric
	n_sims: int = 2000,
	alpha: float = 0.05,
	seed: int = 42,
	) -> dict:
	"""
	Simulate A/B experiments at a given pre-post correlation to show
	how CUPED variance reduction improves power.

	Returns power estimates for raw and CUPED-adjusted tests.
	"""
	rng = np.random.default_rng(seed)
	raw_sig = 0
	cuped_sig = 0

	for _ in range(n_sims):
	# Generate pre-experiment metric (user latent quality)
	latent = rng.standard_normal(n_per_group * 2)
	pre = latent + rng.standard_normal(n_per_group * 2)

	# Post-experiment metric correlated with pre
	noise = rng.standard_normal(n_per_group * 2)
	post_base = corr * latent + np.sqrt(1 - corr ** 2) * noise

	ctrl_post = post_base[:n_per_group]
	trt_post = post_base[n_per_group:] + true_effect # add treatment effect

	ctrl_pre = pre[:n_per_group]
	trt_pre = pre[n_per_group:]

	# Raw test
	_, p_raw = stats.ttest_ind(ctrl_post, trt_post)

	# CUPED adjustment
	y_all = np.concatenate([ctrl_post, trt_post])
	x_all = np.concatenate([ctrl_pre, trt_pre])
	x_mean = x_all.mean()
	cov = np.cov(y_all, x_all)
	theta = cov[0, 1] / cov[1, 1] if cov[1, 1] > 0 else 0.0

	ctrl_adj = ctrl_post - theta * (ctrl_pre - x_mean)
	trt_adj = trt_post - theta * (trt_pre - x_mean)
	_, p_cup = stats.ttest_ind(ctrl_adj, trt_adj)

	if p_raw < alpha:
	raw_sig += 1
	if p_cup < alpha:
	cuped_sig += 1

	return {
	"n_per_group": n_per_group,
	"true_effect": true_effect,
	"pre_post_correlation": corr,
	"theoretical_variance_reduction_pct": round(corr ** 2 * 100, 2),
	"raw_power": round(raw_sig / n_sims, 4),
	"cuped_power": round(cuped_sig / n_sims, 4),
	"power_gain_pct": round((cuped_sig - raw_sig) / max(raw_sig, 1) * 100, 1),
	}