import sys, os import numpy as np, pandas as pd import warnings; warnings.filterwarnings('ignore') sys.path.insert(0, os.path.dirname(__file__)) from backtesting.engines.v30_causal_engine import get_data, evaluate_slice, V30_PARAMS from backtesting.audits.v53_causal_audit import run_v53_causal def test_2_1_strict_train_test(dc, spy, vf, daily_ret): print("--- Test 2.1: Strict Train/Test Split ---") # Run full backtest c = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS) # Evaluate Train (2008-2018) m_train = evaluate_slice(c, "2008-01-01", "2018-12-31") train_sharpe = m_train['sharpe'] # Evaluate Test (2019-2025) m_test = evaluate_slice(c, "2019-01-01", "2025-12-31") test_sharpe = m_test['sharpe'] print(f"Train Sharpe (2008-2018): {train_sharpe:.4f}") print(f"Test Sharpe (2019-2025): {test_sharpe:.4f}") diff = test_sharpe - train_sharpe print(f"Difference: {diff:+.4f}") if abs(diff) <= 0.20: print("Result: PASS (Robust, not overfit)") elif diff > 0.20: print("Result: STRONG PASS (Exceptionally robust, test improved)") else: print("Result: FAIL (Likely overfit, test decayed > 0.20)") def test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret): print("\n--- Test 2.2: Start Date Sensitivity (Path Dependency Test) ---") sharpes = [] print("Running 20 start date offsets (3-day steps) to sample distribution...", end="", flush=True) offsets = list(range(0, 60, 3)) for offset in offsets: offset_dc = dc.iloc[offset:] offset_spy = spy.iloc[offset:] offset_daily_ret = daily_ret.iloc[offset:] c = run_v53_causal(offset_dc, offset_spy, vf, offset_daily_ret, **V30_PARAMS) m = evaluate_slice(c, "2008-01-01", "2025-12-31") sharpes.append(m['sharpe']) print(".", end="", flush=True) print() s_mean = np.mean(sharpes) s_std = np.std(sharpes) s_min = np.min(sharpes) s_max = np.max(sharpes) s_range = s_max - s_min print(f"Mean Sharpe: {s_mean:.4f}") print(f"Std Dev: {s_std:.4f}") print(f"Min Sharpe: {s_min:.4f}") print(f"Max Sharpe: {s_max:.4f}") print(f"Range: {s_range:.4f}") if s_range < 0.20: print("Result: PASS (Low path dependency, robust)") else: print("Result: FAIL (High path dependency, unreliable)") def test_2_3_survivorship_bias(dc, spy, vf, daily_ret): print("\n--- Test 2.3: Survivorship Bias Quantification ---") # Baseline c_base = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS) m_base = evaluate_slice(c_base, "2008-01-01", "2025-12-31") base_cagr = m_base['cagr'] print(f"Baseline Full Universe CAGR: {base_cagr:.1f}%") POISON_TICKERS = [ "BBBY", "WISH", "CLOV", "WKHS", "RIDE", "NKLA", "QS", "HYLN", "SPCE", "SKLZ", "CLNE", "GOEV", "ARVL", "FSR", "PSFE", "OPEN", "SOFI", "BARK", "BIRD", "BYND", "PTON", "ZM", "DOCU", "TDOC", "FVRR", "UPST", "AFRM", "RKLB", "IONQ", "DNA", "LAZR", "VLDR", "MVIS", "WOOF", "SDC", "LMND", "ROOT", "COUR", "DNUT", "OLO", "TUYA", "PAYO", "BGRY", "SEER", "PRCH", "ACHR", "JOBY", "LILM", "EVGO", "CHPT" ] import yfinance as yf print("Downloading poison ticker data...", end="", flush=True) poison_raw = yf.download(POISON_TICKERS, start="2006-01-01", end="2025-12-31", progress=False) if isinstance(poison_raw.columns, pd.MultiIndex): lvl0 = poison_raw.columns.get_level_values(0).unique().tolist() poison_close = poison_raw["Close"] if "Close" in lvl0 else poison_raw if isinstance(poison_close.columns, pd.MultiIndex): poison_close.columns = poison_close.columns.get_level_values(-1) else: poison_close = poison_raw valid_poison = [t for t in POISON_TICKERS if t in poison_close.columns and poison_close[t].notna().sum() > 100] print(f" {len(valid_poison)} valid poison tickers downloaded.") dc_poisoned = dc.copy() for t in valid_poison: if t not in dc_poisoned.columns: dc_poisoned[t] = poison_close[t].reindex(dc_poisoned.index).ffill() daily_ret_poisoned = dc_poisoned.pct_change() poison_universe = list(dict.fromkeys(list(vf) + valid_poison)) c_poison = run_v53_causal(dc_poisoned, spy, poison_universe, daily_ret_poisoned, **V30_PARAMS) m_poison = evaluate_slice(c_poison, "2008-01-01", "2025-12-31") poison_cagr = m_poison['cagr'] print(f"Poison Universe CAGR: {poison_cagr:.1f}%") diff = base_cagr - poison_cagr print(f"CAGR Difference (Bias Est): {diff:+.1f}%") # Acceptable if within 5% of original (framework kill-test 2) if diff <= 5.0 and poison_cagr > 0: print("Result: PASS (Strategy survived poison universe)") else: print("Result: FAIL (Alpha relies heavily on universe selection/survivorship)") if __name__ == "__main__": print("========================================") print(" V53 FRAMEWORK VALIDATION - PHASE 2") print("========================================") dc, spy, vf, daily_ret = get_data() test_2_1_strict_train_test(dc, spy, vf, daily_ret) test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret) test_2_3_survivorship_bias(dc, spy, vf, daily_ret)