Spaces:

fikri0o0
/

ab-testing-causal

Running

File size: 6,778 Bytes
"""
CUPED – Controlled-experiment Using Pre-Existing Data.

Reference: Deng, Xu, Kohavi, Walker (2013) "Improving the Sensitivity of
Online Controlled Experiments by Utilizing Pre-Experiment Data."
Microsoft Research. KDD 2013.

Key idea
--------
The post-experiment metric Y is correlated with a pre-experiment covariate X
(e.g., last month's conversion rate). We subtract the part of Y that is
"predictable" from X, leaving a lower-variance residual. This reduces the
required sample size by (1 – ρ²), where ρ = Corr(Y, X).

Usage
-----
    from src.cuped import CUPEDResult, cuped_ttest
    result = cuped_ttest(df, metric_col="converted", pre_metric_col="pre_conv")
"""
from __future__ import annotations

import numpy as np
import pandas as pd
from dataclasses import dataclass, asdict

from scipy import stats


@dataclass
class CUPEDResult:
    theta: float                  # regression coefficient of X on Y
    corr_pre_post: float          # Pearson ρ between pre and post metric
    variance_reduction_pct: float # % reduction in metric variance

    # Raw (unadjusted) test
    raw_mean_control: float
    raw_mean_treatment: float
    raw_t_stat: float
    raw_p_value: float
    raw_significant: bool

    # CUPED-adjusted test
    cuped_mean_control: float
    cuped_mean_treatment: float
    cuped_t_stat: float
    cuped_p_value: float
    cuped_significant: bool

    # Effective sample size saving
    sample_size_reduction_pct: float   # how much smaller an experiment could be

    alpha: float

    def to_dict(self) -> dict:
        return asdict(self)


def _cuped_adjust(
    y: np.ndarray,
    x: np.ndarray,
    x_global_mean: float,
) -> tuple[np.ndarray, float]:
    """
    Return (y_adjusted, theta) where:
        y_adj = y - theta * (x - E[X])
        theta = Cov(Y, X) / Var(X)
    """
    cov_matrix = np.cov(y, x)
    theta = cov_matrix[0, 1] / cov_matrix[1, 1] if cov_matrix[1, 1] > 0 else 0.0
    y_adj = y - theta * (x - x_global_mean)
    return y_adj, theta


def cuped_ttest(
    df: pd.DataFrame,
    metric_col: str,
    pre_metric_col: str,
    treatment_col: str = "treatment",
    alpha: float = 0.05,
) -> CUPEDResult:
    """
    Run a t-test on both raw and CUPED-adjusted metrics.

    Parameters
    ----------
    df            : DataFrame with columns [metric_col, pre_metric_col, treatment_col]
    metric_col    : post-experiment outcome (e.g., "revenue", "converted")
    pre_metric_col: pre-experiment covariate (same metric from before the experiment)
    treatment_col : 0 = control, 1 = treatment
    alpha         : significance level
    """
    ctrl = df[df[treatment_col] == 0]
    trt  = df[df[treatment_col] == 1]

    y_ctrl = ctrl[metric_col].values.astype(float)
    y_trt  = trt[metric_col].values.astype(float)
    x_ctrl = ctrl[pre_metric_col].values.astype(float)
    x_trt  = trt[pre_metric_col].values.astype(float)

    # Theta is estimated on the full dataset (pooled) — avoids bias
    y_all = df[metric_col].values.astype(float)
    x_all = df[pre_metric_col].values.astype(float)
    x_global_mean = x_all.mean()

    _, theta = _cuped_adjust(y_all, x_all, x_global_mean)

    # Apply same theta to each group (uses global X̄ to keep E[Y_adj] = E[Y])
    y_ctrl_adj = y_ctrl - theta * (x_ctrl - x_global_mean)
    y_trt_adj  = y_trt  - theta * (x_trt  - x_global_mean)

    # Variance reduction
    var_raw   = np.var(y_all)
    var_cuped = np.var(y_all - theta * (x_all - x_global_mean))
    var_reduction = (1.0 - var_cuped / var_raw) * 100.0 if var_raw > 0 else 0.0

    # Correlation of pre and post metric
    corr = float(np.corrcoef(y_all, x_all)[0, 1])

    # Raw t-test
    t_raw, p_raw = stats.ttest_ind(y_ctrl, y_trt)

    # CUPED t-test
    t_cup, p_cup = stats.ttest_ind(y_ctrl_adj, y_trt_adj)

    # Sample size reduction (CUPED needs (1-ρ²) fraction of original n)
    ss_reduction = corr ** 2 * 100.0

    return CUPEDResult(
        theta=round(float(theta), 6),
        corr_pre_post=round(corr, 4),
        variance_reduction_pct=round(var_reduction, 2),
        raw_mean_control=round(float(y_ctrl.mean()), 6),
        raw_mean_treatment=round(float(y_trt.mean()), 6),
        raw_t_stat=round(float(t_raw), 4),
        raw_p_value=round(float(p_raw), 6),
        raw_significant=bool(p_raw < alpha),
        cuped_mean_control=round(float(y_ctrl_adj.mean()), 6),
        cuped_mean_treatment=round(float(y_trt_adj.mean()), 6),
        cuped_t_stat=round(float(t_cup), 4),
        cuped_p_value=round(float(p_cup), 6),
        cuped_significant=bool(p_cup < alpha),
        sample_size_reduction_pct=round(ss_reduction, 2),
        alpha=alpha,
    )


def simulate_cuped_benefit(
    n_per_group: int,
    baseline_rate: float,
    true_effect: float,
    corr: float,          # correlation between pre and post metric
    n_sims: int = 2000,
    alpha: float = 0.05,
    seed: int = 42,
) -> dict:
    """
    Simulate A/B experiments at a given pre-post correlation to show
    how CUPED variance reduction improves power.

    Returns power estimates for raw and CUPED-adjusted tests.
    """
    rng = np.random.default_rng(seed)
    raw_sig   = 0
    cuped_sig = 0

    for _ in range(n_sims):
        # Generate pre-experiment metric (user latent quality)
        latent = rng.standard_normal(n_per_group * 2)
        pre    = latent + rng.standard_normal(n_per_group * 2)

        # Post-experiment metric correlated with pre
        noise  = rng.standard_normal(n_per_group * 2)
        post_base = corr * latent + np.sqrt(1 - corr ** 2) * noise

        ctrl_post = post_base[:n_per_group]
        trt_post  = post_base[n_per_group:] + true_effect  # add treatment effect

        ctrl_pre = pre[:n_per_group]
        trt_pre  = pre[n_per_group:]

        # Raw test
        _, p_raw = stats.ttest_ind(ctrl_post, trt_post)

        # CUPED adjustment
        y_all = np.concatenate([ctrl_post, trt_post])
        x_all = np.concatenate([ctrl_pre, trt_pre])
        x_mean = x_all.mean()
        cov = np.cov(y_all, x_all)
        theta = cov[0, 1] / cov[1, 1] if cov[1, 1] > 0 else 0.0

        ctrl_adj = ctrl_post - theta * (ctrl_pre - x_mean)
        trt_adj  = trt_post  - theta * (trt_pre  - x_mean)
        _, p_cup = stats.ttest_ind(ctrl_adj, trt_adj)

        if p_raw < alpha:
            raw_sig += 1
        if p_cup < alpha:
            cuped_sig += 1

    return {
        "n_per_group": n_per_group,
        "true_effect": true_effect,
        "pre_post_correlation": corr,
        "theoretical_variance_reduction_pct": round(corr ** 2 * 100, 2),
        "raw_power": round(raw_sig / n_sims, 4),
        "cuped_power": round(cuped_sig / n_sims, 4),
        "power_gain_pct": round((cuped_sig - raw_sig) / max(raw_sig, 1) * 100, 1),
    }