Spaces:
Running
Running
| import sys, os | |
| import numpy as np, pandas as pd | |
| import warnings; warnings.filterwarnings('ignore') | |
| sys.path.insert(0, os.path.dirname(__file__)) | |
| from backtesting.engines.v30_causal_engine import get_data, evaluate_slice, V30_PARAMS | |
| from backtesting.audits.v53_causal_audit import run_v53_causal | |
| def test_2_1_strict_train_test(dc, spy, vf, daily_ret): | |
| print("--- Test 2.1: Strict Train/Test Split ---") | |
| # Run full backtest | |
| c = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS) | |
| # Evaluate Train (2008-2018) | |
| m_train = evaluate_slice(c, "2008-01-01", "2018-12-31") | |
| train_sharpe = m_train['sharpe'] | |
| # Evaluate Test (2019-2025) | |
| m_test = evaluate_slice(c, "2019-01-01", "2025-12-31") | |
| test_sharpe = m_test['sharpe'] | |
| print(f"Train Sharpe (2008-2018): {train_sharpe:.4f}") | |
| print(f"Test Sharpe (2019-2025): {test_sharpe:.4f}") | |
| diff = test_sharpe - train_sharpe | |
| print(f"Difference: {diff:+.4f}") | |
| if abs(diff) <= 0.20: | |
| print("Result: PASS (Robust, not overfit)") | |
| elif diff > 0.20: | |
| print("Result: STRONG PASS (Exceptionally robust, test improved)") | |
| else: | |
| print("Result: FAIL (Likely overfit, test decayed > 0.20)") | |
| def test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret): | |
| print("\n--- Test 2.2: Start Date Sensitivity (Path Dependency Test) ---") | |
| sharpes = [] | |
| print("Running 20 start date offsets (3-day steps) to sample distribution...", end="", flush=True) | |
| offsets = list(range(0, 60, 3)) | |
| for offset in offsets: | |
| offset_dc = dc.iloc[offset:] | |
| offset_spy = spy.iloc[offset:] | |
| offset_daily_ret = daily_ret.iloc[offset:] | |
| c = run_v53_causal(offset_dc, offset_spy, vf, offset_daily_ret, **V30_PARAMS) | |
| m = evaluate_slice(c, "2008-01-01", "2025-12-31") | |
| sharpes.append(m['sharpe']) | |
| print(".", end="", flush=True) | |
| print() | |
| s_mean = np.mean(sharpes) | |
| s_std = np.std(sharpes) | |
| s_min = np.min(sharpes) | |
| s_max = np.max(sharpes) | |
| s_range = s_max - s_min | |
| print(f"Mean Sharpe: {s_mean:.4f}") | |
| print(f"Std Dev: {s_std:.4f}") | |
| print(f"Min Sharpe: {s_min:.4f}") | |
| print(f"Max Sharpe: {s_max:.4f}") | |
| print(f"Range: {s_range:.4f}") | |
| if s_range < 0.20: | |
| print("Result: PASS (Low path dependency, robust)") | |
| else: | |
| print("Result: FAIL (High path dependency, unreliable)") | |
| def test_2_3_survivorship_bias(dc, spy, vf, daily_ret): | |
| print("\n--- Test 2.3: Survivorship Bias Quantification ---") | |
| # Baseline | |
| c_base = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS) | |
| m_base = evaluate_slice(c_base, "2008-01-01", "2025-12-31") | |
| base_cagr = m_base['cagr'] | |
| print(f"Baseline Full Universe CAGR: {base_cagr:.1f}%") | |
| POISON_TICKERS = [ | |
| "BBBY", "WISH", "CLOV", "WKHS", "RIDE", "NKLA", "QS", "HYLN", | |
| "SPCE", "SKLZ", "CLNE", "GOEV", "ARVL", "FSR", "PSFE", "OPEN", | |
| "SOFI", "BARK", "BIRD", "BYND", "PTON", "ZM", "DOCU", "TDOC", | |
| "FVRR", "UPST", "AFRM", "RKLB", "IONQ", "DNA", "LAZR", "VLDR", | |
| "MVIS", "WOOF", "SDC", "LMND", "ROOT", "COUR", "DNUT", "OLO", | |
| "TUYA", "PAYO", "BGRY", "SEER", "PRCH", "ACHR", "JOBY", "LILM", | |
| "EVGO", "CHPT" | |
| ] | |
| import yfinance as yf | |
| print("Downloading poison ticker data...", end="", flush=True) | |
| poison_raw = yf.download(POISON_TICKERS, start="2006-01-01", end="2025-12-31", progress=False) | |
| if isinstance(poison_raw.columns, pd.MultiIndex): | |
| lvl0 = poison_raw.columns.get_level_values(0).unique().tolist() | |
| poison_close = poison_raw["Close"] if "Close" in lvl0 else poison_raw | |
| if isinstance(poison_close.columns, pd.MultiIndex): | |
| poison_close.columns = poison_close.columns.get_level_values(-1) | |
| else: | |
| poison_close = poison_raw | |
| valid_poison = [t for t in POISON_TICKERS if t in poison_close.columns and poison_close[t].notna().sum() > 100] | |
| print(f" {len(valid_poison)} valid poison tickers downloaded.") | |
| dc_poisoned = dc.copy() | |
| for t in valid_poison: | |
| if t not in dc_poisoned.columns: | |
| dc_poisoned[t] = poison_close[t].reindex(dc_poisoned.index).ffill() | |
| daily_ret_poisoned = dc_poisoned.pct_change() | |
| poison_universe = list(dict.fromkeys(list(vf) + valid_poison)) | |
| c_poison = run_v53_causal(dc_poisoned, spy, poison_universe, daily_ret_poisoned, **V30_PARAMS) | |
| m_poison = evaluate_slice(c_poison, "2008-01-01", "2025-12-31") | |
| poison_cagr = m_poison['cagr'] | |
| print(f"Poison Universe CAGR: {poison_cagr:.1f}%") | |
| diff = base_cagr - poison_cagr | |
| print(f"CAGR Difference (Bias Est): {diff:+.1f}%") | |
| # Acceptable if within 5% of original (framework kill-test 2) | |
| if diff <= 5.0 and poison_cagr > 0: | |
| print("Result: PASS (Strategy survived poison universe)") | |
| else: | |
| print("Result: FAIL (Alpha relies heavily on universe selection/survivorship)") | |
| if __name__ == "__main__": | |
| print("========================================") | |
| print(" V53 FRAMEWORK VALIDATION - PHASE 2") | |
| print("========================================") | |
| dc, spy, vf, daily_ret = get_data() | |
| test_2_1_strict_train_test(dc, spy, vf, daily_ret) | |
| test_2_2_start_date_sensitivity(dc, spy, vf, daily_ret) | |
| test_2_3_survivorship_bias(dc, spy, vf, daily_ret) | |