Spaces:
Running
Running
| import sys, os | |
| import itertools | |
| import warnings; warnings.filterwarnings('ignore') | |
| from backtesting.framework.config import STRATEGY_NAME, ACTIVE_STRATEGY_FN, ACTIVE_PARAMS, load_data | |
| try: | |
| from v30_causal_engine import evaluate_slice | |
| except ImportError: | |
| from backtesting.v30_causal_engine import evaluate_slice | |
| def run_test_4_1(): | |
| print("=" * 80) | |
| print(f" TEST 4.1 & 4.2: PARAMETER ROBUSTNESS & OVERFIT DETECTION - {STRATEGY_NAME}") | |
| print("=" * 80) | |
| dc, spy, vf, daily_ret = load_data() | |
| # Define a default parameter grid to test if none is provided | |
| # We will test rebalance frequency and short momentum lookback | |
| param_grid = { | |
| 'rebal_days': [40, 60, 80], | |
| 'mom_short': [15, 21, 30] | |
| } | |
| keys = list(param_grid.keys()) | |
| combos = list(itertools.product(*(param_grid[k] for k in keys))) | |
| print(f"Evaluating {len(combos)} parameter combinations...") | |
| res = {} | |
| for combo in combos: | |
| p = ACTIVE_PARAMS.copy() | |
| for k, v in zip(keys, combo): | |
| p[k] = v | |
| c = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p) | |
| if isinstance(c, dict) and 'curve' in c: | |
| c = c['curve'] | |
| m = evaluate_slice(c, "2008-01-01", "2025-12-31") | |
| res[combo] = m['sharpe'] | |
| print(f" Params {dict(zip(keys, combo))}: Sharpe {m['sharpe']:.4f}") | |
| sharpes = list(res.values()) | |
| s_min, s_max = min(sharpes), max(sharpes) | |
| print("-" * 80) | |
| print(f" Surface Range: {s_min:.4f} to {s_max:.4f}") | |
| if s_min >= 0.70: | |
| print(" VERDICT: PASS (Smooth, robust parameter surface)") | |
| else: | |
| print(" VERDICT: WEAK PASS / FAIL (Surface contains weak spots or cliff edges)") | |
| print("\n--- Test 4.2: Single Parameter Overfit Detection ---") | |
| best_combo = max(res, key=res.get) | |
| best_params = dict(zip(keys, best_combo)) | |
| print(f" Best Params found: {best_params} -> Evaluating Overfit Risk...") | |
| p_best = ACTIVE_PARAMS.copy() | |
| p_best.update(best_params) | |
| c_best = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p_best) | |
| if isinstance(c_best, dict) and 'curve' in c_best: | |
| c_best = c_best['curve'] | |
| m_train = evaluate_slice(c_best, "2008-01-01", "2018-12-31") | |
| m_test = evaluate_slice(c_best, "2019-01-01", "2025-12-31") | |
| print(f" Train Sharpe (2008-2018): {m_train['sharpe']:.4f}") | |
| print(f" Test Sharpe (2019-2025): {m_test['sharpe']:.4f}") | |
| diff = m_test['sharpe'] - m_train['sharpe'] | |
| print(f" Out-of-Sample Lift: {diff:+.4f}") | |
| print("-" * 80) | |
| # The requirement is that Test didn't collapse massively compared to train | |
| if diff >= -0.15: | |
| if diff > 0: | |
| print(" VERDICT: PASS (Improvement is genuine out-of-sample)") | |
| else: | |
| print(" VERDICT: PASS (Optimization held out-of-sample within acceptable limits)") | |
| else: | |
| print(" VERDICT: FAIL (Pure overfit, optimization disappeared out-of-sample)") | |
| if __name__ == "__main__": | |
| run_test_4_1() | |