import sys, os import numpy as np, pandas as pd import warnings; warnings.filterwarnings('ignore') sys.path.insert(0, os.path.dirname(__file__)) from backtesting.engines.v30_causal_engine import get_data, evaluate_slice, V30_PARAMS from backtesting.audits.v53_causal_audit import run_v53_causal def test_4_1_param_surface(dc, spy, vf, daily_ret): print("--- Test 4.1: Parameter Sensitivity Surface ---") rebal_grid = [21, 30, 40, 60] pick_grid = [5, 10, 15, 20] print("Sweeping parameter grid...", end="", flush=True) results = {} for r in rebal_grid: for p in pick_grid: params = V30_PARAMS.copy() params['rebal_days'] = r params['top_n'] = p c = run_v53_causal(dc, spy, vf, daily_ret, **params) m = evaluate_slice(c, "2008-01-01", "2025-12-31") results[(r, p)] = m['sharpe'] print(".", end="", flush=True) print("\n") sharpes = list(results.values()) min_s, max_s = min(sharpes), max(sharpes) for r in rebal_grid: row_str = f"Rebal {r:2d}d | " for p in pick_grid: row_str += f"Pick{p:2d}: {results[(r, p)]:.2f} " print(row_str) print(f"\nSurface Range: {min_s:.2f} to {max_s:.2f}") if min_s >= 0.70: print("Result: PASS (Smooth, robust surface)") elif min_s < 0.60: print("Result: FAIL (Cliff edge detected)") else: print("Result: WEAK PASS (Acceptable but contains weak spots)") return results def test_4_2_single_param_overfit(dc, spy, vf, daily_ret, results): print("\n--- Test 4.2: Single Parameter Overfit Detection ---") # Find the parameter set that gave the highest full-period Sharpe best_params = max(results, key=results.get) best_r, best_p = best_params best_s = results[best_params] base_s = results.get((60, 15), 0) print(f"Base Params (60d, 15 picks) Sharpe: {base_s:.4f}") print(f"Best Params ({best_r}d, {best_p} picks) Sharpe: {best_s:.4f}") diff = best_s - base_s if diff > 0.10: print("\nSignificant improvement detected. Running Train/Test Split on best params...") params = V30_PARAMS.copy() params['rebal_days'] = best_r params['top_n'] = best_p c = run_v53_causal(dc, spy, vf, daily_ret, **params) m_train = evaluate_slice(c, "2008-01-01", "2018-12-31") m_test = evaluate_slice(c, "2019-01-01", "2025-12-31") print(f"Train Sharpe (2008-2018): {m_train['sharpe']:.4f}") print(f"Test Sharpe (2019-2025): {m_test['sharpe']:.4f}") # Original baseline test sharpe c_orig = run_v53_causal(dc, spy, vf, daily_ret, **V30_PARAMS) m_orig_test = evaluate_slice(c_orig, "2019-01-01", "2025-12-31") orig_test_s = m_orig_test['sharpe'] test_diff = m_test['sharpe'] - orig_test_s print(f"Out-of-sample edge over base params: {test_diff:+.4f}") if test_diff > 0.10: print("Result: PASS (Improvement is genuine out-of-sample)") elif test_diff < 0: print("Result: FAIL (Pure overfit, optimization disappeared out-of-sample)") else: print("Result: WEAK PASS (Marginal out-of-sample improvement)") else: print("\nNo single parameter caused dramatic (>0.10) overfit behavior.") print("Result: PASS (Base parameters are near optimal or stable)") if __name__ == "__main__": print("========================================") print(" V53 FRAMEWORK VALIDATION - PHASE 4") print("========================================") dc, spy, vf, daily_ret = get_data() res = test_4_1_param_surface(dc, spy, vf, daily_ret) test_4_2_single_param_overfit(dc, spy, vf, daily_ret, res)