import sys, os import itertools import warnings; warnings.filterwarnings('ignore') from backtesting.framework.config import STRATEGY_NAME, ACTIVE_STRATEGY_FN, ACTIVE_PARAMS, load_data try: from v30_causal_engine import evaluate_slice except ImportError: from backtesting.v30_causal_engine import evaluate_slice def run_test_4_1(): print("=" * 80) print(f" TEST 4.1 & 4.2: PARAMETER ROBUSTNESS & OVERFIT DETECTION - {STRATEGY_NAME}") print("=" * 80) dc, spy, vf, daily_ret = load_data() # Define a default parameter grid to test if none is provided # We will test rebalance frequency and short momentum lookback param_grid = { 'rebal_days': [40, 60, 80], 'mom_short': [15, 21, 30] } keys = list(param_grid.keys()) combos = list(itertools.product(*(param_grid[k] for k in keys))) print(f"Evaluating {len(combos)} parameter combinations...") res = {} for combo in combos: p = ACTIVE_PARAMS.copy() for k, v in zip(keys, combo): p[k] = v c = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p) if isinstance(c, dict) and 'curve' in c: c = c['curve'] m = evaluate_slice(c, "2008-01-01", "2025-12-31") res[combo] = m['sharpe'] print(f" Params {dict(zip(keys, combo))}: Sharpe {m['sharpe']:.4f}") sharpes = list(res.values()) s_min, s_max = min(sharpes), max(sharpes) print("-" * 80) print(f" Surface Range: {s_min:.4f} to {s_max:.4f}") if s_min >= 0.70: print(" VERDICT: PASS (Smooth, robust parameter surface)") else: print(" VERDICT: WEAK PASS / FAIL (Surface contains weak spots or cliff edges)") print("\n--- Test 4.2: Single Parameter Overfit Detection ---") best_combo = max(res, key=res.get) best_params = dict(zip(keys, best_combo)) print(f" Best Params found: {best_params} -> Evaluating Overfit Risk...") p_best = ACTIVE_PARAMS.copy() p_best.update(best_params) c_best = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p_best) if isinstance(c_best, dict) and 'curve' in c_best: c_best = c_best['curve'] m_train = evaluate_slice(c_best, "2008-01-01", "2018-12-31") m_test = evaluate_slice(c_best, "2019-01-01", "2025-12-31") print(f" Train Sharpe (2008-2018): {m_train['sharpe']:.4f}") print(f" Test Sharpe (2019-2025): {m_test['sharpe']:.4f}") diff = m_test['sharpe'] - m_train['sharpe'] print(f" Out-of-Sample Lift: {diff:+.4f}") print("-" * 80) # The requirement is that Test didn't collapse massively compared to train if diff >= -0.15: if diff > 0: print(" VERDICT: PASS (Improvement is genuine out-of-sample)") else: print(" VERDICT: PASS (Optimization held out-of-sample within acceptable limits)") else: print(" VERDICT: FAIL (Pure overfit, optimization disappeared out-of-sample)") if __name__ == "__main__": run_test_4_1()