File size: 6,778 Bytes
4256820
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
"""
CUPED – Controlled-experiment Using Pre-Existing Data.

Reference: Deng, Xu, Kohavi, Walker (2013) "Improving the Sensitivity of
Online Controlled Experiments by Utilizing Pre-Experiment Data."
Microsoft Research. KDD 2013.

Key idea
--------
The post-experiment metric Y is correlated with a pre-experiment covariate X
(e.g., last month's conversion rate). We subtract the part of Y that is
"predictable" from X, leaving a lower-variance residual. This reduces the
required sample size by (1 – ρ²), where ρ = Corr(Y, X).

Usage
-----
    from src.cuped import CUPEDResult, cuped_ttest
    result = cuped_ttest(df, metric_col="converted", pre_metric_col="pre_conv")
"""
from __future__ import annotations

import numpy as np
import pandas as pd
from dataclasses import dataclass, asdict

from scipy import stats


@dataclass
class CUPEDResult:
    theta: float                  # regression coefficient of X on Y
    corr_pre_post: float          # Pearson ρ between pre and post metric
    variance_reduction_pct: float # % reduction in metric variance

    # Raw (unadjusted) test
    raw_mean_control: float
    raw_mean_treatment: float
    raw_t_stat: float
    raw_p_value: float
    raw_significant: bool

    # CUPED-adjusted test
    cuped_mean_control: float
    cuped_mean_treatment: float
    cuped_t_stat: float
    cuped_p_value: float
    cuped_significant: bool

    # Effective sample size saving
    sample_size_reduction_pct: float   # how much smaller an experiment could be

    alpha: float

    def to_dict(self) -> dict:
        return asdict(self)


def _cuped_adjust(
    y: np.ndarray,
    x: np.ndarray,
    x_global_mean: float,
) -> tuple[np.ndarray, float]:
    """
    Return (y_adjusted, theta) where:
        y_adj = y - theta * (x - E[X])
        theta = Cov(Y, X) / Var(X)
    """
    cov_matrix = np.cov(y, x)
    theta = cov_matrix[0, 1] / cov_matrix[1, 1] if cov_matrix[1, 1] > 0 else 0.0
    y_adj = y - theta * (x - x_global_mean)
    return y_adj, theta


def cuped_ttest(
    df: pd.DataFrame,
    metric_col: str,
    pre_metric_col: str,
    treatment_col: str = "treatment",
    alpha: float = 0.05,
) -> CUPEDResult:
    """
    Run a t-test on both raw and CUPED-adjusted metrics.

    Parameters
    ----------
    df            : DataFrame with columns [metric_col, pre_metric_col, treatment_col]
    metric_col    : post-experiment outcome (e.g., "revenue", "converted")
    pre_metric_col: pre-experiment covariate (same metric from before the experiment)
    treatment_col : 0 = control, 1 = treatment
    alpha         : significance level
    """
    ctrl = df[df[treatment_col] == 0]
    trt  = df[df[treatment_col] == 1]

    y_ctrl = ctrl[metric_col].values.astype(float)
    y_trt  = trt[metric_col].values.astype(float)
    x_ctrl = ctrl[pre_metric_col].values.astype(float)
    x_trt  = trt[pre_metric_col].values.astype(float)

    # Theta is estimated on the full dataset (pooled) — avoids bias
    y_all = df[metric_col].values.astype(float)
    x_all = df[pre_metric_col].values.astype(float)
    x_global_mean = x_all.mean()

    _, theta = _cuped_adjust(y_all, x_all, x_global_mean)

    # Apply same theta to each group (uses global X̄ to keep E[Y_adj] = E[Y])
    y_ctrl_adj = y_ctrl - theta * (x_ctrl - x_global_mean)
    y_trt_adj  = y_trt  - theta * (x_trt  - x_global_mean)

    # Variance reduction
    var_raw   = np.var(y_all)
    var_cuped = np.var(y_all - theta * (x_all - x_global_mean))
    var_reduction = (1.0 - var_cuped / var_raw) * 100.0 if var_raw > 0 else 0.0

    # Correlation of pre and post metric
    corr = float(np.corrcoef(y_all, x_all)[0, 1])

    # Raw t-test
    t_raw, p_raw = stats.ttest_ind(y_ctrl, y_trt)

    # CUPED t-test
    t_cup, p_cup = stats.ttest_ind(y_ctrl_adj, y_trt_adj)

    # Sample size reduction (CUPED needs (1-ρ²) fraction of original n)
    ss_reduction = corr ** 2 * 100.0

    return CUPEDResult(
        theta=round(float(theta), 6),
        corr_pre_post=round(corr, 4),
        variance_reduction_pct=round(var_reduction, 2),
        raw_mean_control=round(float(y_ctrl.mean()), 6),
        raw_mean_treatment=round(float(y_trt.mean()), 6),
        raw_t_stat=round(float(t_raw), 4),
        raw_p_value=round(float(p_raw), 6),
        raw_significant=bool(p_raw < alpha),
        cuped_mean_control=round(float(y_ctrl_adj.mean()), 6),
        cuped_mean_treatment=round(float(y_trt_adj.mean()), 6),
        cuped_t_stat=round(float(t_cup), 4),
        cuped_p_value=round(float(p_cup), 6),
        cuped_significant=bool(p_cup < alpha),
        sample_size_reduction_pct=round(ss_reduction, 2),
        alpha=alpha,
    )


def simulate_cuped_benefit(
    n_per_group: int,
    baseline_rate: float,
    true_effect: float,
    corr: float,          # correlation between pre and post metric
    n_sims: int = 2000,
    alpha: float = 0.05,
    seed: int = 42,
) -> dict:
    """
    Simulate A/B experiments at a given pre-post correlation to show
    how CUPED variance reduction improves power.

    Returns power estimates for raw and CUPED-adjusted tests.
    """
    rng = np.random.default_rng(seed)
    raw_sig   = 0
    cuped_sig = 0

    for _ in range(n_sims):
        # Generate pre-experiment metric (user latent quality)
        latent = rng.standard_normal(n_per_group * 2)
        pre    = latent + rng.standard_normal(n_per_group * 2)

        # Post-experiment metric correlated with pre
        noise  = rng.standard_normal(n_per_group * 2)
        post_base = corr * latent + np.sqrt(1 - corr ** 2) * noise

        ctrl_post = post_base[:n_per_group]
        trt_post  = post_base[n_per_group:] + true_effect  # add treatment effect

        ctrl_pre = pre[:n_per_group]
        trt_pre  = pre[n_per_group:]

        # Raw test
        _, p_raw = stats.ttest_ind(ctrl_post, trt_post)

        # CUPED adjustment
        y_all = np.concatenate([ctrl_post, trt_post])
        x_all = np.concatenate([ctrl_pre, trt_pre])
        x_mean = x_all.mean()
        cov = np.cov(y_all, x_all)
        theta = cov[0, 1] / cov[1, 1] if cov[1, 1] > 0 else 0.0

        ctrl_adj = ctrl_post - theta * (ctrl_pre - x_mean)
        trt_adj  = trt_post  - theta * (trt_pre  - x_mean)
        _, p_cup = stats.ttest_ind(ctrl_adj, trt_adj)

        if p_raw < alpha:
            raw_sig += 1
        if p_cup < alpha:
            cuped_sig += 1

    return {
        "n_per_group": n_per_group,
        "true_effect": true_effect,
        "pre_post_correlation": corr,
        "theoretical_variance_reduction_pct": round(corr ** 2 * 100, 2),
        "raw_power": round(raw_sig / n_sims, 4),
        "cuped_power": round(cuped_sig / n_sims, 4),
        "power_gain_pct": round((cuped_sig - raw_sig) / max(raw_sig, 1) * 100, 1),
    }