Premchan369
/

alphaforge-quant-system

+"""Walk-Forward Validation Engine - The #1 Most Critical Missing Piece
+Walk-forward validation is the ONLY correct way to test time series strategies.
+Random train/test split = GUARANTEED data leakage and false results.
+Based on: Ong & Herremans 2023 (MTL-TSMOM), Lopez de Prado 2018
+"""
+import numpy as np
+import pandas as pd
+from typing import Dict, List, Tuple, Optional, Callable, Iterator
+from dataclasses import dataclass
+import warnings
+warnings.filterwarnings('ignore')
+@dataclass
+class WalkForwardConfig:
+    """Configuration for walk-forward validation"""
+    min_train_size: int = 252          # Minimum training days (1 year)
+    test_size: int = 63               # Test window (3 months)
+    step_size: int = 21               # Step forward (1 month)
+    embargo_gap: int = 5             # Days between train and test (prevents leakage)
+    purge_k: int = 0                  # Purge k overlapping observations
+    n_splits: Optional[int] = None   # Number of splits (auto-calculated if None)
+class PurgedKFoldCV:
+    """
+    Purged K-Fold Cross-Validation for Time Series.
+    Based on Marcos Lopez de Prado (2018) "Advances in Financial Machine Learning".
+    Key idea: When you train up to date T and test starting at T+1,
+    observations NEAR T may still leak information because they're autocorrelated.
+    We "purge" (remove) observations within `purge_k` of the boundary.
+    This prevents the #1 error in quant backtesting: data leakage.
+    """
+    def __init__(self, n_splits: int = 5, purge_k: int = 10):
+        self.n_splits = n_splits
+        self.purge_k = purge_k
+    def split(self, X: np.ndarray, y: Optional[np.ndarray] = None,
+              groups: Optional[np.ndarray] = None) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
+        """Generate train/test indices with purging"""
+        n_samples = len(X)
+        fold_size = n_samples // self.n_splits
+        for i in range(self.n_splits):
+            # Test indices
+            test_start = i * fold_size
+            test_end = min((i + 1) * fold_size, n_samples)
+            test_indices = np.arange(test_start, test_end)
+            # Train indices: everything before test, with purge gap
+            train_end = max(0, test_start - self.purge_k)
+            train_indices = np.arange(0, train_end)
+            # Also exclude overlapping test observations from previous folds
+            if i > 0:
+                # Add a gap after previous test set
+                prev_test_end = i * fold_size
+                train_indices = train_indices[train_indices < (prev_test_end - self.purge_k)]
+            yield train_indices, test_indices
+class ExpandingWindowWalkForward:
+    """
+    Expanding window walk-forward with embargo gap.
+    This is the standard for financial backtests:
+    - Train on [0, T]
+    - Embargo gap: [T+1, T+gap] (no overlap, no leakage)
+    - Test on [T+gap+1, T+gap+test_size]
+    - Next fold: Train on [0, T+step], test on [T+step+gap+1, T+step+gap+test_size]
+    The training set GROWS over time (expanding window), simulating how
+    you would actually trade: you start with less data, gain more over time.
+    """
+    def __init__(self, config: Optional[WalkForwardConfig] = None):
+        self.config = config or WalkForwardConfig()
+    def split(self, X: np.ndarray, y: Optional[np.ndarray] = None,
+              groups: Optional[np.ndarray] = None) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
+        """Generate expanding window train/test splits"""
+        n_samples = len(X)
+        cfg = self.config
+        # Calculate number of splits
+        if cfg.n_splits is not None:
+            n_splits = cfg.n_splits
+        else:
+            # Calculate based on data size
+            available = n_samples - cfg.min_train_size - cfg.embargo_gap - cfg.test_size
+            n_splits = max(1, available // cfg.step_size)
+        for i in range(n_splits):
+            # Expanding train window
+            train_end = cfg.min_train_size + i * cfg.step_size
+            if train_end >= n_samples - cfg.embargo_gap - cfg.test_size:
+                break
+            train_indices = np.arange(0, train_end)
+            # Embargo gap (prevents leakage)
+            test_start = train_end + cfg.embargo_gap
+            test_end = min(test_start + cfg.test_size, n_samples)
+            test_indices = np.arange(test_start, test_end)
+            if len(test_indices) < 10:
+                break
+            yield train_indices, test_indices
+    def get_n_splits(self, X: np.ndarray, y=None, groups=None) -> int:
+        """Get number of splits"""
+        count = 0
+        for _ in self.split(X):
+            count += 1
+        return count
+class SlidingWindowWalkForward:
+    """
+    Sliding window walk-forward (fixed-size training window).
+    Unlike expanding window, the training set size stays constant.
+    Old data drops off as new data comes in.
+    Better for: Regime-changing markets where old data becomes irrelevant.
+    Worse for: Early periods with limited data.
+    """
+    def __init__(self, train_size: int = 504, test_size: int = 63,
+                 step_size: int = 21, embargo_gap: int = 5):
+        self.train_size = train_size
+        self.test_size = test_size
+        self.step_size = step_size
+        self.embargo_gap = embargo_gap
+    def split(self, X: np.ndarray, y: Optional[np.ndarray] = None,
+              groups: Optional[np.ndarray] = None) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
+        """Generate sliding window train/test splits"""
+        n_samples = len(X)
+        start = self.train_size
+        while start + self.embargo_gap + self.test_size <= n_samples:
+            train_start = start - self.train_size
+            train_end = start
+            train_indices = np.arange(train_start, train_end)
+            test_start = train_end + self.embargo_gap
+            test_end = test_start + self.test_size
+            test_indices = np.arange(test_start, test_end)
+            yield train_indices, test_indices
+            start += self.step_size
+class CombinatorialPurgedCV:
+    """
+    Combinatorial Purged Cross-Validation (CPCV).
+    THE GOLD STANDARD for financial ML backtesting.
+    Based on Lopez de Prado (2019): Instead of sequential splits, we create
+    all possible combinations of train/test splits with embargo gaps.
+    This gives N choose K test sets, providing much more robust statistics.
+    Why this matters:
+    - Standard walk-forward: You test on 5 periods. Maybe 4 are bull, 1 is bear.
+      Your model looks great but fails in bear markets.
+    - CPCV: You test on ALL combinations. Some train sets include bear, some don't.
+      You get a distribution of performance, not a single number.
+    This is the difference between "my strategy returned 20%" and
+    "my strategy has a 95% chance of returning 10-30% with max drawdown < 15%."
+    """
+    def __init__(self, n_splits: int = 6, n_test_splits: int = 2,
+                 embargo_pct: float = 0.04):
+        """
+        Args:
+            n_splits: Total number of groups to divide data into
+            n_test_splits: How many groups form the test set
+            embargo_pct: Percentage of data to embargo between train and test
+        """
+        self.n_splits = n_splits
+        self.n_test_splits = n_test_splits
+        self.embargo_pct = embargo_pct
+        from itertools import combinations
+        self.test_combinations = list(combinations(range(n_splits), n_test_splits))
+    def split(self, X: np.ndarray, y: Optional[np.ndarray] = None,
+              groups: Optional[np.ndarray] = None) -> Iterator[Tuple[np.ndarray, np.ndarray]]:
+        """Generate combinatorial purged train/test splits"""
+        n_samples = len(X)
+        fold_size = n_samples // self.n_splits
+        embargo_size = int(fold_size * self.embargo_pct)
+        for test_groups in self.test_combinations:
+            # Test indices: union of selected test groups
+            test_indices = []
+            for g in test_groups:
+                start = g * fold_size
+                end = min((g + 1) * fold_size, n_samples)
+                test_indices.extend(range(start, end))
+            test_indices = np.array(test_indices)
+            # Train indices: everything NOT in test, with embargo gaps
+            train_indices = []
+            for g in range(self.n_splits):
+                if g in test_groups:
+                    continue
+                start = g * fold_size
+                end = min((g + 1) * fold_size, n_samples)
+                # Add embargo gap if adjacent to test group
+                for tg in test_groups:
+                    if abs(g - tg) == 1:  # Adjacent
+                        if g < tg:
+                            end = max(start, end - embargo_size)
+                        else:
+                            start = min(start + embargo_size, end)
+                if start < end:
+                    train_indices.extend(range(start, end))
+            train_indices = np.array(train_indices)
+            if len(train_indices) > 0 and len(test_indices) > 0:
+                yield train_indices, test_indices
+    def get_n_splits(self, X=None, y=None, groups=None) -> int:
+        return len(self.test_combinations)
+class WalkForwardBacktest:
+    """
+    Complete walk-forward backtest engine.
+    This runs your ENTIRE pipeline (data → features → model → portfolio → execution)
+    through walk-forward validation, giving you the ONLY honest backtest result.
+    Usage:
+        backtest = WalkForwardBacktest(config=WalkForwardConfig(min_train_size=504))
+        results = backtest.run(
+            data_pipeline=data_pipeline,
+            alpha_model_factory=alpha_factory,
+            portfolio_optimizer=optimizer,
+            backtest_engine=backtest_engine
+        )
+    Returns: Honest Sharpe, drawdown, IC distributions — not fake overfit numbers.
+    """
+    def __init__(self, config: Optional[WalkForwardConfig] = None,
+                 cv_type: str = 'expanding'):
+        self.config = config or WalkForwardConfig()
+        self.cv_type = cv_type
+        if cv_type == 'expanding':
+            self.cv = ExpandingWindowWalkForward(config)
+        elif cv_type == 'sliding':
+            self.cv = SlidingWindowWalkForward(
+                config.min_train_size, config.test_size,
+                config.step_size, config.embargo_gap
+            )
+        elif cv_type == 'purged':
+            self.cv = PurgedKFoldCV(n_splits=5, purge_k=config.embargo_gap)
+        elif cv_type == 'combinatorial':
+            self.cv = CombinatorialPurgedCV(n_splits=6, n_test_splits=2)
+        else:
+            raise ValueError(f"Unknown cv_type: {cv_type}")
+    def run(self, X: np.ndarray, y: np.ndarray,
+            model_factory: Callable,
+            eval_fn: Callable[[np.ndarray, np.ndarray], Dict]) -> Dict:
+        """
+        Run walk-forward validation.
+        Args:
+            X: Features array
+            y: Target array
+            model_factory: Callable that returns a NEW model instance
+            eval_fn: Callable(pred, actual) -> dict of metrics
+        Returns:
+            Dict with fold-by-fold results and aggregate statistics
+        """
+        fold_results = []
+        print(f"Running {self.cv_type} walk-forward validation...")
+        print(f"Config: train_min={self.config.min_train_size}, "
+              f"test={self.config.test_size}, step={self.config.step_size}, "
+              f"embargo={self.config.embargo_gap}")
+        for fold, (train_idx, test_idx) in enumerate(self.cv.split(X, y)):
+            print(f"\nFold {fold + 1}/{self.cv.get_n_splits(X)}")
+            print(f"  Train: {len(train_idx)} samples ({train_idx[0]} to {train_idx[-1]})")
+            print(f"  Test:  {len(test_idx)} samples ({test_idx[0]} to {test_idx[-1]})")
+            # Split data
+            X_train, X_test = X[train_idx], X[test_idx]
+            y_train, y_test = y[train_idx], y[test_idx]
+            # Train fresh model (NO LOOKAHEAD!)
+            model = model_factory()
+            model.fit(X_train, y_train)
+            # Predict
+            y_pred = model.predict(X_test)
+            # Evaluate
+            metrics = eval_fn(y_pred, y_test)
+            metrics['fold'] = fold
+            metrics['train_size'] = len(train_idx)
+            metrics['test_size'] = len(test_idx)
+            metrics['train_start'] = int(train_idx[0])
+            metrics['train_end'] = int(train_idx[-1])
+            metrics['test_start'] = int(test_idx[0])
+            metrics['test_end'] = int(test_idx[-1])
+            print(f"  Metrics: {metrics}")
+            fold_results.append(metrics)
+        # Aggregate statistics
+        aggregate = self._aggregate_results(fold_results)
+        return {
+            'fold_results': fold_results,
+            'aggregate': aggregate,
+            'cv_type': self.cv_type,
+            'config': self.config
+        }
+    def _aggregate_results(self, fold_results: List[Dict]) -> Dict:
+        """Compute aggregate statistics across folds"""
+        if not fold_results:
+            return {}
+        # Collect numeric metrics
+        numeric_keys = []
+        for key in fold_results[0].keys():
+            if key not in ['fold', 'train_start', 'train_end', 'test_start', 'test_end',
+                          'train_size', 'test_size']:
+                if isinstance(fold_results[0][key], (int, float, np.number)):
+                    numeric_keys.append(key)
+        aggregate = {}
+        for key in numeric_keys:
+            values = [r[key] for r in fold_results if key in r and r[key] is not None]
+            if not values:
+                continue
+            values = np.array(values)
+            aggregate[key] = {
+                'mean': float(np.mean(values)),
+                'std': float(np.std(values)),
+                'min': float(np.min(values)),
+                'max': float(np.max(values)),
+                'median': float(np.median(values)),
+                'pct_5th': float(np.percentile(values, 5)),
+                'pct_95th': float(np.percentile(values, 95))
+            }
+        return aggregate
+def honest_backtest_example():
+    """Example of how walk-forward prevents false results"""
+    from sklearn.linear_model import Ridge
+    from scipy.stats import spearmanr
+    # Generate fake time series with autocorrelation (like real markets)
+    np.random.seed(42)
+    n = 2000
+    y = np.zeros(n)
+    y[0] = np.random.randn()
+    for i in range(1, n):
+        y[i] = 0.7 * y[i-1] + np.random.randn() * 0.5  # AR(1) process
+    # Features: lagged y + noise (realistic)
+    X = np.zeros((n, 5))
+    for lag in range(5):
+        X[lag+1:, lag] = y[:-lag-1] if lag > 0 else y[:-1]
+    X[0, :] = 0  # First row has no history
+    # Random train/test split (WRONG for time series!)
+    from sklearn.model_selection import train_test_split
+    X_train_bad, X_test_bad, y_train_bad, y_test_bad = train_test_split(
+        X[5:], y[5:], test_size=0.3, random_state=42
+    )
+    model_bad = Ridge().fit(X_train_bad, y_train_bad)
+    pred_bad = model_bad.predict(X_test_bad)
+    ic_bad, _ = spearmanr(pred_bad, y_test_bad)
+    # Walk-forward split (CORRECT!)
+    wf = ExpandingWindowWalkForward(
+        WalkForwardConfig(min_train_size=500, test_size=200, step_size=200, embargo_gap=10)
+    )
+    ics_wf = []
+    for train_idx, test_idx in wf.split(X[5:], y[5:]):
+        X_train_wf, X_test_wf = X[5:][train_idx], X[5:][test_idx]
+        y_train_wf, y_test_wf = y[5:][train_idx], y[5:][test_idx]
+        model_wf = Ridge().fit(X_train_wf, y_train_wf)
+        pred_wf = model_wf.predict(X_test_wf)
+        ic_wf, _ = spearmanr(pred_wf, y_test_wf)
+        ics_wf.append(ic_wf)
+    print("=" * 60)
+    print("THE WALK-FORWARD TRUTH BOMB")
+    print("=" * 60)
+    print(f"Random split IC:      {ic_bad:.4f}  ← This is a LIE")
+    print(f"Walk-forward IC:      {np.mean(ics_wf):.4f} ± {np.std(ics_wf):.4f}")
+    print(f"Walk-forward range:   [{np.min(ics_wf):.4f}, {np.max(ics_wf):.4f}]")
+    print()
+    print("Random split looks great because future data leaked into training!")
+    print("Walk-forward is honest because it only trains on PAST data.")
+    print(f"Difference: {abs(ic_bad - np.mean(ics_wf)):.4f} — this is your FALSE HOPE.")
+if __name__ == '__main__':
+    honest_backtest_example()