Spaces:
Running
Running
File size: 5,510 Bytes
8e50444 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 | import sys, os
import numpy as np, pandas as pd
import warnings; warnings.filterwarnings('ignore')
sys.path.insert(0, os.path.dirname(__file__))
from backtesting.engines.v30_causal_engine import get_data, evaluate_slice, V30_PARAMS
from backtesting.audits.v53_causal_audit import run_v53_causal
def test_2_1_strict_train_test(dc, spy, vf, daily_ret):
print("--- Test 2.1: Strict Train/Test Split ---")
# Run full backtest
c = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS)
# Evaluate Train (2008-2018)
m_train = evaluate_slice(c, "2008-01-01", "2018-12-31")
train_sharpe = m_train['sharpe']
# Evaluate Test (2019-2025)
m_test = evaluate_slice(c, "2019-01-01", "2025-12-31")
test_sharpe = m_test['sharpe']
print(f"Train Sharpe (2008-2018): {train_sharpe:.4f}")
print(f"Test Sharpe (2019-2025): {test_sharpe:.4f}")
diff = test_sharpe - train_sharpe
print(f"Difference: {diff:+.4f}")
if abs(diff) <= 0.20:
print("Result: PASS (Robust, not overfit)")
elif diff > 0.20:
print("Result: STRONG PASS (Exceptionally robust, test improved)")
else:
print("Result: FAIL (Likely overfit, test decayed > 0.20)")
def test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret):
print("\n--- Test 2.2: Start Date Sensitivity (Path Dependency Test) ---")
sharpes = []
print("Running 20 start date offsets (3-day steps) to sample distribution...", end="", flush=True)
offsets = list(range(0, 60, 3))
for offset in offsets:
offset_dc = dc.iloc[offset:]
offset_spy = spy.iloc[offset:]
offset_daily_ret = daily_ret.iloc[offset:]
c = run_v53_causal(offset_dc, offset_spy, vf, offset_daily_ret, **V30_PARAMS)
m = evaluate_slice(c, "2008-01-01", "2025-12-31")
sharpes.append(m['sharpe'])
print(".", end="", flush=True)
print()
s_mean = np.mean(sharpes)
s_std = np.std(sharpes)
s_min = np.min(sharpes)
s_max = np.max(sharpes)
s_range = s_max - s_min
print(f"Mean Sharpe: {s_mean:.4f}")
print(f"Std Dev: {s_std:.4f}")
print(f"Min Sharpe: {s_min:.4f}")
print(f"Max Sharpe: {s_max:.4f}")
print(f"Range: {s_range:.4f}")
if s_range < 0.20:
print("Result: PASS (Low path dependency, robust)")
else:
print("Result: FAIL (High path dependency, unreliable)")
def test_2_3_survivorship_bias(dc, spy, vf, daily_ret):
print("\n--- Test 2.3: Survivorship Bias Quantification ---")
# Baseline
c_base = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS)
m_base = evaluate_slice(c_base, "2008-01-01", "2025-12-31")
base_cagr = m_base['cagr']
print(f"Baseline Full Universe CAGR: {base_cagr:.1f}%")
POISON_TICKERS = [
"BBBY", "WISH", "CLOV", "WKHS", "RIDE", "NKLA", "QS", "HYLN",
"SPCE", "SKLZ", "CLNE", "GOEV", "ARVL", "FSR", "PSFE", "OPEN",
"SOFI", "BARK", "BIRD", "BYND", "PTON", "ZM", "DOCU", "TDOC",
"FVRR", "UPST", "AFRM", "RKLB", "IONQ", "DNA", "LAZR", "VLDR",
"MVIS", "WOOF", "SDC", "LMND", "ROOT", "COUR", "DNUT", "OLO",
"TUYA", "PAYO", "BGRY", "SEER", "PRCH", "ACHR", "JOBY", "LILM",
"EVGO", "CHPT"
]
import yfinance as yf
print("Downloading poison ticker data...", end="", flush=True)
poison_raw = yf.download(POISON_TICKERS, start="2006-01-01", end="2025-12-31", progress=False)
if isinstance(poison_raw.columns, pd.MultiIndex):
lvl0 = poison_raw.columns.get_level_values(0).unique().tolist()
poison_close = poison_raw["Close"] if "Close" in lvl0 else poison_raw
if isinstance(poison_close.columns, pd.MultiIndex):
poison_close.columns = poison_close.columns.get_level_values(-1)
else:
poison_close = poison_raw
valid_poison = [t for t in POISON_TICKERS if t in poison_close.columns and poison_close[t].notna().sum() > 100]
print(f" {len(valid_poison)} valid poison tickers downloaded.")
dc_poisoned = dc.copy()
for t in valid_poison:
if t not in dc_poisoned.columns:
dc_poisoned[t] = poison_close[t].reindex(dc_poisoned.index).ffill()
daily_ret_poisoned = dc_poisoned.pct_change()
poison_universe = list(dict.fromkeys(list(vf) + valid_poison))
c_poison = run_v53_causal(dc_poisoned, spy, poison_universe, daily_ret_poisoned, **V30_PARAMS)
m_poison = evaluate_slice(c_poison, "2008-01-01", "2025-12-31")
poison_cagr = m_poison['cagr']
print(f"Poison Universe CAGR: {poison_cagr:.1f}%")
diff = base_cagr - poison_cagr
print(f"CAGR Difference (Bias Est): {diff:+.1f}%")
# Acceptable if within 5% of original (framework kill-test 2)
if diff <= 5.0 and poison_cagr > 0:
print("Result: PASS (Strategy survived poison universe)")
else:
print("Result: FAIL (Alpha relies heavily on universe selection/survivorship)")
if __name__ == "__main__":
print("========================================")
print(" V53 FRAMEWORK VALIDATION - PHASE 2")
print("========================================")
dc, spy, vf, daily_ret = get_data()
test_2_1_strict_train_test(dc, spy, vf, daily_ret)
test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret)
test_2_3_survivorship_bias(dc, spy, vf, daily_ret)
|