import sys, os
import itertools
import warnings; warnings.filterwarnings('ignore')

from backtesting.framework.config import STRATEGY_NAME, ACTIVE_STRATEGY_FN, ACTIVE_PARAMS, load_data

try:
    from v30_causal_engine import evaluate_slice
except ImportError:
    from backtesting.v30_causal_engine import evaluate_slice

def run_test_4_1():
    print("=" * 80)
    print(f" TEST 4.1 & 4.2: PARAMETER ROBUSTNESS & OVERFIT DETECTION - {STRATEGY_NAME}")
    print("=" * 80)

    dc, spy, vf, daily_ret = load_data()
    
    # Define a default parameter grid to test if none is provided
    # We will test rebalance frequency and short momentum lookback
    param_grid = {
        'rebal_days': [40, 60, 80],
        'mom_short': [15, 21, 30]
    }
    
    keys = list(param_grid.keys())
    combos = list(itertools.product(*(param_grid[k] for k in keys)))
    
    print(f"Evaluating {len(combos)} parameter combinations...")
    res = {}
    
    for combo in combos:
        p = ACTIVE_PARAMS.copy()
        for k, v in zip(keys, combo):
            p[k] = v
            
        c = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p)
        if isinstance(c, dict) and 'curve' in c:
            c = c['curve']
            
        m = evaluate_slice(c, "2008-01-01", "2025-12-31")
        res[combo] = m['sharpe']
        print(f"  Params {dict(zip(keys, combo))}: Sharpe {m['sharpe']:.4f}")
        
    sharpes = list(res.values())
    s_min, s_max = min(sharpes), max(sharpes)
    
    print("-" * 80)
    print(f"  Surface Range: {s_min:.4f} to {s_max:.4f}")
    if s_min >= 0.70:
        print("  VERDICT: PASS (Smooth, robust parameter surface)")
    else:
        print("  VERDICT: WEAK PASS / FAIL (Surface contains weak spots or cliff edges)")
        
    print("\n--- Test 4.2: Single Parameter Overfit Detection ---")
    best_combo = max(res, key=res.get)
    best_params = dict(zip(keys, best_combo))
    
    print(f"  Best Params found: {best_params} -> Evaluating Overfit Risk...")
    p_best = ACTIVE_PARAMS.copy()
    p_best.update(best_params)
    
    c_best = ACTIVE_STRATEGY_FN(dc, spy, vf, daily_ret, **p_best)
    if isinstance(c_best, dict) and 'curve' in c_best:
        c_best = c_best['curve']
        
    m_train = evaluate_slice(c_best, "2008-01-01", "2018-12-31")
    m_test = evaluate_slice(c_best, "2019-01-01", "2025-12-31")
    
    print(f"  Train Sharpe (2008-2018): {m_train['sharpe']:.4f}")
    print(f"  Test Sharpe  (2019-2025): {m_test['sharpe']:.4f}")
    
    diff = m_test['sharpe'] - m_train['sharpe']
    print(f"  Out-of-Sample Lift: {diff:+.4f}")
    
    print("-" * 80)
    # The requirement is that Test didn't collapse massively compared to train
    if diff >= -0.15:
        if diff > 0:
            print("  VERDICT: PASS (Improvement is genuine out-of-sample)")
        else:
            print("  VERDICT: PASS (Optimization held out-of-sample within acceptable limits)")
    else:
        print("  VERDICT: FAIL (Pure overfit, optimization disappeared out-of-sample)")

if __name__ == "__main__":
    run_test_4_1()