File size: 5,510 Bytes
8e50444
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys, os
import numpy as np, pandas as pd
import warnings; warnings.filterwarnings('ignore')

sys.path.insert(0, os.path.dirname(__file__))
from backtesting.engines.v30_causal_engine import get_data, evaluate_slice, V30_PARAMS
from backtesting.audits.v53_causal_audit import run_v53_causal

def test_2_1_strict_train_test(dc, spy, vf, daily_ret):
    print("--- Test 2.1: Strict Train/Test Split ---")
    
    # Run full backtest
    c = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS)
    
    # Evaluate Train (2008-2018)
    m_train = evaluate_slice(c, "2008-01-01", "2018-12-31")
    train_sharpe = m_train['sharpe']
    
    # Evaluate Test (2019-2025)
    m_test = evaluate_slice(c, "2019-01-01", "2025-12-31")
    test_sharpe = m_test['sharpe']
    
    print(f"Train Sharpe (2008-2018): {train_sharpe:.4f}")
    print(f"Test Sharpe  (2019-2025): {test_sharpe:.4f}")
    
    diff = test_sharpe - train_sharpe
    print(f"Difference: {diff:+.4f}")
    
    if abs(diff) <= 0.20:
        print("Result: PASS (Robust, not overfit)")
    elif diff > 0.20:
        print("Result: STRONG PASS (Exceptionally robust, test improved)")
    else:
        print("Result: FAIL (Likely overfit, test decayed > 0.20)")

def test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret):
    print("\n--- Test 2.2: Start Date Sensitivity (Path Dependency Test) ---")
    sharpes = []
    
    print("Running 20 start date offsets (3-day steps) to sample distribution...", end="", flush=True)
    offsets = list(range(0, 60, 3))
    for offset in offsets:
        offset_dc = dc.iloc[offset:]
        offset_spy = spy.iloc[offset:]
        offset_daily_ret = daily_ret.iloc[offset:]
        
        c = run_v53_causal(offset_dc, offset_spy, vf, offset_daily_ret, **V30_PARAMS)
        m = evaluate_slice(c, "2008-01-01", "2025-12-31")
        sharpes.append(m['sharpe'])
        print(".", end="", flush=True)
    print()
    
    s_mean = np.mean(sharpes)
    s_std = np.std(sharpes)
    s_min = np.min(sharpes)
    s_max = np.max(sharpes)
    s_range = s_max - s_min
    
    print(f"Mean Sharpe: {s_mean:.4f}")
    print(f"Std Dev:     {s_std:.4f}")
    print(f"Min Sharpe:  {s_min:.4f}")
    print(f"Max Sharpe:  {s_max:.4f}")
    print(f"Range:       {s_range:.4f}")
    
    if s_range < 0.20:
        print("Result: PASS (Low path dependency, robust)")
    else:
        print("Result: FAIL (High path dependency, unreliable)")

def test_2_3_survivorship_bias(dc, spy, vf, daily_ret):
    print("\n--- Test 2.3: Survivorship Bias Quantification ---")
    
    # Baseline
    c_base = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS)
    m_base = evaluate_slice(c_base, "2008-01-01", "2025-12-31")
    base_cagr = m_base['cagr']
    print(f"Baseline Full Universe CAGR: {base_cagr:.1f}%")
    
    POISON_TICKERS = [
        "BBBY", "WISH", "CLOV", "WKHS", "RIDE", "NKLA", "QS", "HYLN",
        "SPCE", "SKLZ", "CLNE", "GOEV", "ARVL", "FSR", "PSFE", "OPEN",
        "SOFI", "BARK", "BIRD", "BYND", "PTON", "ZM", "DOCU", "TDOC",
        "FVRR", "UPST", "AFRM", "RKLB", "IONQ", "DNA", "LAZR", "VLDR",
        "MVIS", "WOOF", "SDC", "LMND", "ROOT", "COUR", "DNUT", "OLO",
        "TUYA", "PAYO", "BGRY", "SEER", "PRCH", "ACHR", "JOBY", "LILM",
        "EVGO", "CHPT"
    ]
    
    import yfinance as yf
    print("Downloading poison ticker data...", end="", flush=True)
    poison_raw = yf.download(POISON_TICKERS, start="2006-01-01", end="2025-12-31", progress=False)
    if isinstance(poison_raw.columns, pd.MultiIndex):
        lvl0 = poison_raw.columns.get_level_values(0).unique().tolist()
        poison_close = poison_raw["Close"] if "Close" in lvl0 else poison_raw
        if isinstance(poison_close.columns, pd.MultiIndex):
            poison_close.columns = poison_close.columns.get_level_values(-1)
    else:
        poison_close = poison_raw

    valid_poison = [t for t in POISON_TICKERS if t in poison_close.columns and poison_close[t].notna().sum() > 100]
    print(f" {len(valid_poison)} valid poison tickers downloaded.")
    
    dc_poisoned = dc.copy()
    for t in valid_poison:
        if t not in dc_poisoned.columns:
            dc_poisoned[t] = poison_close[t].reindex(dc_poisoned.index).ffill()

    daily_ret_poisoned = dc_poisoned.pct_change()
    poison_universe = list(dict.fromkeys(list(vf) + valid_poison))
    
    c_poison = run_v53_causal(dc_poisoned, spy, poison_universe, daily_ret_poisoned, **V30_PARAMS)
    m_poison = evaluate_slice(c_poison, "2008-01-01", "2025-12-31")
    poison_cagr = m_poison['cagr']
    print(f"Poison Universe CAGR: {poison_cagr:.1f}%")
    
    diff = base_cagr - poison_cagr
    print(f"CAGR Difference (Bias Est): {diff:+.1f}%")
    
    # Acceptable if within 5% of original (framework kill-test 2)
    if diff <= 5.0 and poison_cagr > 0:
        print("Result: PASS (Strategy survived poison universe)")
    else:
        print("Result: FAIL (Alpha relies heavily on universe selection/survivorship)")

if __name__ == "__main__":
    print("========================================")
    print(" V53 FRAMEWORK VALIDATION - PHASE 2")
    print("========================================")
    dc, spy, vf, daily_ret = get_data()
    
    test_2_1_strict_train_test(dc, spy, vf, daily_ret)
    test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret)
    test_2_3_survivorship_bias(dc, spy, vf, daily_ret)