""" Backtesting & Evaluation Module ================================= Proper financial evaluation metrics and backtesting framework. Metrics: - Direction Accuracy - Sharpe Ratio - Max Drawdown - Information Coefficient (IC) - Profit Factor - Calmar Ratio - Win Rate """ import numpy as np import pandas as pd import torch from typing import Dict, List, Tuple, Optional from dataclasses import dataclass @dataclass class BacktestResult: """Complete backtesting results.""" total_return: float annualized_return: float sharpe_ratio: float max_drawdown: float calmar_ratio: float profit_factor: float win_rate: float avg_win: float avg_loss: float num_trades: int direction_accuracy: Dict[str, float] information_coefficient: Dict[str, float] equity_curve: np.ndarray trade_log: List[Dict] daily_returns: np.ndarray class Evaluator: """ Comprehensive evaluation of trading predictions. Computes both statistical metrics (IC, direction accuracy) and simulated trading metrics (Sharpe, drawdown, profit factor). """ def __init__(self, prediction_horizons: List[int] = [1, 5, 20], trading_costs: float = 0.001, # 10 bps per trade initial_capital: float = 100000): self.prediction_horizons = prediction_horizons self.trading_costs = trading_costs self.initial_capital = initial_capital def evaluate_predictions( self, model: torch.nn.Module, test_loader: torch.utils.data.DataLoader, device: torch.device, ) -> Dict: """ Evaluate model predictions on test data. Returns comprehensive metrics. """ model.eval() all_preds = {'direction': [], 'returns': [], 'confidence': []} all_targets = {'direction': [], 'returns': []} with torch.no_grad(): for X_batch, y_batch in test_loader: X_batch = X_batch.to(device) outputs = model(X_batch) dir_probs = torch.sigmoid(outputs['direction_logits']).cpu().numpy() ret_preds = outputs['expected_return'].cpu().numpy() log_var = outputs['log_variance'].cpu().numpy() confidence = 1.0 / (1.0 + np.exp(log_var)) all_preds['direction'].append(dir_probs) all_preds['returns'].append(ret_preds) all_preds['confidence'].append(confidence) # Parse targets num_h = len(self.prediction_horizons) y_np = y_batch.numpy() directions = np.stack([y_np[:, i*2] for i in range(num_h)], axis=1) returns = np.stack([y_np[:, i*2+1] for i in range(num_h)], axis=1) all_targets['direction'].append(directions) all_targets['returns'].append(returns) # Concatenate for key in all_preds: all_preds[key] = np.concatenate(all_preds[key], axis=0) for key in all_targets: all_targets[key] = np.concatenate(all_targets[key], axis=0) results = {} # Per-horizon metrics for i, h in enumerate(self.prediction_horizons): horizon_results = self._evaluate_horizon( pred_direction=all_preds['direction'][:, i], pred_return=all_preds['returns'][:, i], pred_confidence=all_preds['confidence'][:, i], true_direction=all_targets['direction'][:, i], true_return=all_targets['returns'][:, i], horizon=h, ) results[f'horizon_{h}'] = horizon_results # Summary results['summary'] = { 'num_test_samples': len(all_preds['direction']), 'avg_direction_accuracy': np.mean([ results[f'horizon_{h}']['direction_accuracy'] for h in self.prediction_horizons ]), 'avg_ic': np.mean([ results[f'horizon_{h}']['information_coefficient'] for h in self.prediction_horizons ]), } return results def _evaluate_horizon( self, pred_direction: np.ndarray, pred_return: np.ndarray, pred_confidence: np.ndarray, true_direction: np.ndarray, true_return: np.ndarray, horizon: int, ) -> Dict: """Evaluate predictions for a single horizon.""" # Direction Accuracy pred_dir_binary = (pred_direction > 0.5).astype(float) direction_accuracy = np.mean(pred_dir_binary == true_direction) # Information Coefficient (Spearman rank correlation) ic = self._spearman_ic(pred_return, true_return) # Simulated trading trading_results = self._simulate_trading( pred_direction, pred_return, pred_confidence, true_return, horizon ) return { 'direction_accuracy': float(direction_accuracy), 'information_coefficient': float(ic), **trading_results, } def _spearman_ic(self, pred: np.ndarray, actual: np.ndarray) -> float: """Compute Information Coefficient (Spearman rank correlation).""" valid = np.isfinite(pred) & np.isfinite(actual) if valid.sum() < 3: return 0.0 pred_rank = self._rank(pred[valid]) actual_rank = self._rank(actual[valid]) n = len(pred_rank) d = pred_rank - actual_rank ic = 1 - (6 * np.sum(d**2)) / (n * (n**2 - 1) + 1e-8) return float(ic) def _rank(self, x: np.ndarray) -> np.ndarray: """Compute ranks of array elements.""" temp = x.argsort() ranks = np.empty_like(temp) ranks[temp] = np.arange(len(x)) return ranks.astype(float) def _simulate_trading( self, pred_direction: np.ndarray, pred_return: np.ndarray, pred_confidence: np.ndarray, true_return: np.ndarray, horizon: int, ) -> Dict: """ Simulate a simple long/short trading strategy. Strategy: - Go long when pred_direction > 0.5 and confidence > threshold - Go short when pred_direction < 0.5 and confidence > threshold - Position size proportional to confidence """ confidence_threshold = 0.55 capital = self.initial_capital equity_curve = [capital] trade_log = [] daily_returns = [] for i in range(0, len(pred_direction), max(horizon, 1)): if i >= len(pred_direction): break conf = pred_confidence[i] if conf < confidence_threshold: daily_returns.append(0.0) equity_curve.append(equity_curve[-1]) continue # Position sizing (confidence-weighted) position_weight = min(conf * 0.5, 0.25) # Max 25% position # Direction if pred_direction[i] > 0.5: position = position_weight # Long else: position = -position_weight # Short # Actual return (clipped for robustness) actual_ret = np.clip(true_return[i], -0.20, 0.20) # Trade PnL (including costs) trade_pnl = position * actual_ret - abs(position) * self.trading_costs capital *= (1 + trade_pnl) equity_curve.append(capital) daily_returns.append(trade_pnl) trade_log.append({ 'step': i, 'direction': 'LONG' if position > 0 else 'SHORT', 'confidence': float(conf), 'position_size': float(abs(position)), 'predicted_return': float(pred_return[i]), 'actual_return': float(actual_ret), 'pnl': float(trade_pnl), 'equity': float(capital), }) daily_returns = np.array(daily_returns) equity_curve = np.array(equity_curve) # Compute metrics total_return = (equity_curve[-1] / equity_curve[0]) - 1 # Annualized return (assuming 252 trading days) n_periods = len(daily_returns) if n_periods > 0 and total_return > -1: annualized_return = (1 + total_return) ** (252 / max(n_periods, 1)) - 1 else: annualized_return = -1.0 # Sharpe ratio if len(daily_returns) > 1 and np.std(daily_returns) > 0: sharpe = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252) else: sharpe = 0.0 # Max drawdown running_max = np.maximum.accumulate(equity_curve) drawdowns = (running_max - equity_curve) / (running_max + 1e-8) max_drawdown = np.max(drawdowns) if len(drawdowns) > 0 else 0.0 # Calmar ratio calmar = annualized_return / (max_drawdown + 1e-8) if max_drawdown > 0 else 0.0 # Win/loss analysis wins = [t['pnl'] for t in trade_log if t['pnl'] > 0] losses = [t['pnl'] for t in trade_log if t['pnl'] <= 0] win_rate = len(wins) / max(len(trade_log), 1) avg_win = np.mean(wins) if wins else 0.0 avg_loss = np.mean(losses) if losses else 0.0 profit_factor = abs(sum(wins)) / (abs(sum(losses)) + 1e-8) if losses else float('inf') return { 'total_return': float(total_return), 'annualized_return': float(annualized_return), 'sharpe_ratio': float(sharpe), 'max_drawdown': float(max_drawdown), 'calmar_ratio': float(calmar), 'profit_factor': float(profit_factor), 'win_rate': float(win_rate), 'avg_win': float(avg_win), 'avg_loss': float(avg_loss), 'num_trades': len(trade_log), 'equity_curve': equity_curve.tolist(), 'daily_returns': daily_returns.tolist(), } def format_evaluation(results: Dict) -> str: """Format evaluation results for display.""" lines = ["═" * 70, " TRADING INTELLIGENCE SYSTEM - EVALUATION REPORT", "═" * 70] summary = results.get('summary', {}) lines.append(f" Test Samples: {summary.get('num_test_samples', 'N/A')}") lines.append(f" Avg Direction Accuracy: {summary.get('avg_direction_accuracy', 0):.1%}") lines.append(f" Avg Information Coefficient: {summary.get('avg_ic', 0):.4f}") lines.append("") for key in sorted(results.keys()): if not key.startswith('horizon_'): continue h = key.split('_')[1] hr = results[key] lines.extend([ f" ┌──────────── Horizon: {h}-day ────────────┐", f" │ Direction Accuracy: {hr.get('direction_accuracy', 0):.1%}", f" │ Information Coeff: {hr.get('information_coefficient', 0):.4f}", f" │ Total Return: {hr.get('total_return', 0):.2%}", f" │ Annualized Return: {hr.get('annualized_return', 0):.2%}", f" │ Sharpe Ratio: {hr.get('sharpe_ratio', 0):.2f}", f" │ Max Drawdown: {hr.get('max_drawdown', 0):.2%}", f" │ Calmar Ratio: {hr.get('calmar_ratio', 0):.2f}", f" │ Profit Factor: {hr.get('profit_factor', 0):.2f}", f" │ Win Rate: {hr.get('win_rate', 0):.1%}", f" │ Avg Win: {hr.get('avg_win', 0):.4f}", f" │ Avg Loss: {hr.get('avg_loss', 0):.4f}", f" │ Num Trades: {hr.get('num_trades', 0)}", f" └{'─' * 42}┘", "" ]) lines.append("═" * 70) return "\n".join(lines)