stockproject / backtesting /framework /test_4_1_param_sweep.py
harshisageek's picture
deploy: clean history for HuggingFace
1cd56b6
import sys, os
import itertools
import warnings; warnings.filterwarnings('ignore')
from backtesting.framework.config import STRATEGY_NAME, ACTIVE_STRATEGY_FN, ACTIVE_PARAMS, load_data
try:
from v30_causal_engine import evaluate_slice
except ImportError:
from backtesting.v30_causal_engine import evaluate_slice
def run_test_4_1():
print("=" * 80)
print(f" TEST 4.1 & 4.2: PARAMETER ROBUSTNESS & OVERFIT DETECTION - {STRATEGY_NAME}")
print("=" * 80)
dc, spy, vf, daily_ret = load_data()
# Define a default parameter grid to test if none is provided
# We will test rebalance frequency and short momentum lookback
param_grid = {
'rebal_days': [40, 60, 80],
'mom_short': [15, 21, 30]
}
keys = list(param_grid.keys())
combos = list(itertools.product(*(param_grid[k] for k in keys)))
print(f"Evaluating {len(combos)} parameter combinations...")
res = {}
for combo in combos:
p = ACTIVE_PARAMS.copy()
for k, v in zip(keys, combo):
p[k] = v
c = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p)
if isinstance(c, dict) and 'curve' in c:
c = c['curve']
m = evaluate_slice(c, "2008-01-01", "2025-12-31")
res[combo] = m['sharpe']
print(f" Params {dict(zip(keys, combo))}: Sharpe {m['sharpe']:.4f}")
sharpes = list(res.values())
s_min, s_max = min(sharpes), max(sharpes)
print("-" * 80)
print(f" Surface Range: {s_min:.4f} to {s_max:.4f}")
if s_min >= 0.70:
print(" VERDICT: PASS (Smooth, robust parameter surface)")
else:
print(" VERDICT: WEAK PASS / FAIL (Surface contains weak spots or cliff edges)")
print("\n--- Test 4.2: Single Parameter Overfit Detection ---")
best_combo = max(res, key=res.get)
best_params = dict(zip(keys, best_combo))
print(f" Best Params found: {best_params} -> Evaluating Overfit Risk...")
p_best = ACTIVE_PARAMS.copy()
p_best.update(best_params)
c_best = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p_best)
if isinstance(c_best, dict) and 'curve' in c_best:
c_best = c_best['curve']
m_train = evaluate_slice(c_best, "2008-01-01", "2018-12-31")
m_test = evaluate_slice(c_best, "2019-01-01", "2025-12-31")
print(f" Train Sharpe (2008-2018): {m_train['sharpe']:.4f}")
print(f" Test Sharpe (2019-2025): {m_test['sharpe']:.4f}")
diff = m_test['sharpe'] - m_train['sharpe']
print(f" Out-of-Sample Lift: {diff:+.4f}")
print("-" * 80)
# The requirement is that Test didn't collapse massively compared to train
if diff >= -0.15:
if diff > 0:
print(" VERDICT: PASS (Improvement is genuine out-of-sample)")
else:
print(" VERDICT: PASS (Optimization held out-of-sample within acceptable limits)")
else:
print(" VERDICT: FAIL (Pure overfit, optimization disappeared out-of-sample)")
if __name__ == "__main__":
run_test_4_1()