| """ |
| Backtesting & Evaluation Module |
| ================================= |
| Proper financial evaluation metrics and backtesting framework. |
| |
| Metrics: |
| - Direction Accuracy |
| - Sharpe Ratio |
| - Max Drawdown |
| - Information Coefficient (IC) |
| - Profit Factor |
| - Calmar Ratio |
| - Win Rate |
| """ |
|
|
| import numpy as np |
| import pandas as pd |
| import torch |
| from typing import Dict, List, Tuple, Optional |
| from dataclasses import dataclass |
|
|
|
|
| @dataclass |
| class BacktestResult: |
| """Complete backtesting results.""" |
| total_return: float |
| annualized_return: float |
| sharpe_ratio: float |
| max_drawdown: float |
| calmar_ratio: float |
| profit_factor: float |
| win_rate: float |
| avg_win: float |
| avg_loss: float |
| num_trades: int |
| direction_accuracy: Dict[str, float] |
| information_coefficient: Dict[str, float] |
| equity_curve: np.ndarray |
| trade_log: List[Dict] |
| daily_returns: np.ndarray |
|
|
|
|
| class Evaluator: |
| """ |
| Comprehensive evaluation of trading predictions. |
| |
| Computes both statistical metrics (IC, direction accuracy) |
| and simulated trading metrics (Sharpe, drawdown, profit factor). |
| """ |
| |
| def __init__(self, prediction_horizons: List[int] = [1, 5, 20], |
| trading_costs: float = 0.001, |
| initial_capital: float = 100000): |
| self.prediction_horizons = prediction_horizons |
| self.trading_costs = trading_costs |
| self.initial_capital = initial_capital |
| |
| def evaluate_predictions( |
| self, |
| model: torch.nn.Module, |
| test_loader: torch.utils.data.DataLoader, |
| device: torch.device, |
| ) -> Dict: |
| """ |
| Evaluate model predictions on test data. |
| |
| Returns comprehensive metrics. |
| """ |
| model.eval() |
| all_preds = {'direction': [], 'returns': [], 'confidence': []} |
| all_targets = {'direction': [], 'returns': []} |
| |
| with torch.no_grad(): |
| for X_batch, y_batch in test_loader: |
| X_batch = X_batch.to(device) |
| outputs = model(X_batch) |
| |
| dir_probs = torch.sigmoid(outputs['direction_logits']).cpu().numpy() |
| ret_preds = outputs['expected_return'].cpu().numpy() |
| log_var = outputs['log_variance'].cpu().numpy() |
| confidence = 1.0 / (1.0 + np.exp(log_var)) |
| |
| all_preds['direction'].append(dir_probs) |
| all_preds['returns'].append(ret_preds) |
| all_preds['confidence'].append(confidence) |
| |
| |
| num_h = len(self.prediction_horizons) |
| y_np = y_batch.numpy() |
| directions = np.stack([y_np[:, i*2] for i in range(num_h)], axis=1) |
| returns = np.stack([y_np[:, i*2+1] for i in range(num_h)], axis=1) |
| |
| all_targets['direction'].append(directions) |
| all_targets['returns'].append(returns) |
| |
| |
| for key in all_preds: |
| all_preds[key] = np.concatenate(all_preds[key], axis=0) |
| for key in all_targets: |
| all_targets[key] = np.concatenate(all_targets[key], axis=0) |
| |
| results = {} |
| |
| |
| for i, h in enumerate(self.prediction_horizons): |
| horizon_results = self._evaluate_horizon( |
| pred_direction=all_preds['direction'][:, i], |
| pred_return=all_preds['returns'][:, i], |
| pred_confidence=all_preds['confidence'][:, i], |
| true_direction=all_targets['direction'][:, i], |
| true_return=all_targets['returns'][:, i], |
| horizon=h, |
| ) |
| results[f'horizon_{h}'] = horizon_results |
| |
| |
| results['summary'] = { |
| 'num_test_samples': len(all_preds['direction']), |
| 'avg_direction_accuracy': np.mean([ |
| results[f'horizon_{h}']['direction_accuracy'] |
| for h in self.prediction_horizons |
| ]), |
| 'avg_ic': np.mean([ |
| results[f'horizon_{h}']['information_coefficient'] |
| for h in self.prediction_horizons |
| ]), |
| } |
| |
| return results |
| |
| def _evaluate_horizon( |
| self, |
| pred_direction: np.ndarray, |
| pred_return: np.ndarray, |
| pred_confidence: np.ndarray, |
| true_direction: np.ndarray, |
| true_return: np.ndarray, |
| horizon: int, |
| ) -> Dict: |
| """Evaluate predictions for a single horizon.""" |
| |
| |
| pred_dir_binary = (pred_direction > 0.5).astype(float) |
| direction_accuracy = np.mean(pred_dir_binary == true_direction) |
| |
| |
| ic = self._spearman_ic(pred_return, true_return) |
| |
| |
| trading_results = self._simulate_trading( |
| pred_direction, pred_return, pred_confidence, true_return, horizon |
| ) |
| |
| return { |
| 'direction_accuracy': float(direction_accuracy), |
| 'information_coefficient': float(ic), |
| **trading_results, |
| } |
| |
| def _spearman_ic(self, pred: np.ndarray, actual: np.ndarray) -> float: |
| """Compute Information Coefficient (Spearman rank correlation).""" |
| valid = np.isfinite(pred) & np.isfinite(actual) |
| if valid.sum() < 3: |
| return 0.0 |
| |
| pred_rank = self._rank(pred[valid]) |
| actual_rank = self._rank(actual[valid]) |
| |
| n = len(pred_rank) |
| d = pred_rank - actual_rank |
| ic = 1 - (6 * np.sum(d**2)) / (n * (n**2 - 1) + 1e-8) |
| |
| return float(ic) |
| |
| def _rank(self, x: np.ndarray) -> np.ndarray: |
| """Compute ranks of array elements.""" |
| temp = x.argsort() |
| ranks = np.empty_like(temp) |
| ranks[temp] = np.arange(len(x)) |
| return ranks.astype(float) |
| |
| def _simulate_trading( |
| self, |
| pred_direction: np.ndarray, |
| pred_return: np.ndarray, |
| pred_confidence: np.ndarray, |
| true_return: np.ndarray, |
| horizon: int, |
| ) -> Dict: |
| """ |
| Simulate a simple long/short trading strategy. |
| |
| Strategy: |
| - Go long when pred_direction > 0.5 and confidence > threshold |
| - Go short when pred_direction < 0.5 and confidence > threshold |
| - Position size proportional to confidence |
| """ |
| confidence_threshold = 0.55 |
| |
| capital = self.initial_capital |
| equity_curve = [capital] |
| trade_log = [] |
| daily_returns = [] |
| |
| for i in range(0, len(pred_direction), max(horizon, 1)): |
| if i >= len(pred_direction): |
| break |
| |
| conf = pred_confidence[i] |
| if conf < confidence_threshold: |
| daily_returns.append(0.0) |
| equity_curve.append(equity_curve[-1]) |
| continue |
| |
| |
| position_weight = min(conf * 0.5, 0.25) |
| |
| |
| if pred_direction[i] > 0.5: |
| position = position_weight |
| else: |
| position = -position_weight |
| |
| |
| actual_ret = np.clip(true_return[i], -0.20, 0.20) |
| |
| |
| trade_pnl = position * actual_ret - abs(position) * self.trading_costs |
| |
| capital *= (1 + trade_pnl) |
| equity_curve.append(capital) |
| daily_returns.append(trade_pnl) |
| |
| trade_log.append({ |
| 'step': i, |
| 'direction': 'LONG' if position > 0 else 'SHORT', |
| 'confidence': float(conf), |
| 'position_size': float(abs(position)), |
| 'predicted_return': float(pred_return[i]), |
| 'actual_return': float(actual_ret), |
| 'pnl': float(trade_pnl), |
| 'equity': float(capital), |
| }) |
| |
| daily_returns = np.array(daily_returns) |
| equity_curve = np.array(equity_curve) |
| |
| |
| total_return = (equity_curve[-1] / equity_curve[0]) - 1 |
| |
| |
| n_periods = len(daily_returns) |
| if n_periods > 0 and total_return > -1: |
| annualized_return = (1 + total_return) ** (252 / max(n_periods, 1)) - 1 |
| else: |
| annualized_return = -1.0 |
| |
| |
| if len(daily_returns) > 1 and np.std(daily_returns) > 0: |
| sharpe = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252) |
| else: |
| sharpe = 0.0 |
| |
| |
| running_max = np.maximum.accumulate(equity_curve) |
| drawdowns = (running_max - equity_curve) / (running_max + 1e-8) |
| max_drawdown = np.max(drawdowns) if len(drawdowns) > 0 else 0.0 |
| |
| |
| calmar = annualized_return / (max_drawdown + 1e-8) if max_drawdown > 0 else 0.0 |
| |
| |
| wins = [t['pnl'] for t in trade_log if t['pnl'] > 0] |
| losses = [t['pnl'] for t in trade_log if t['pnl'] <= 0] |
| |
| win_rate = len(wins) / max(len(trade_log), 1) |
| avg_win = np.mean(wins) if wins else 0.0 |
| avg_loss = np.mean(losses) if losses else 0.0 |
| profit_factor = abs(sum(wins)) / (abs(sum(losses)) + 1e-8) if losses else float('inf') |
| |
| return { |
| 'total_return': float(total_return), |
| 'annualized_return': float(annualized_return), |
| 'sharpe_ratio': float(sharpe), |
| 'max_drawdown': float(max_drawdown), |
| 'calmar_ratio': float(calmar), |
| 'profit_factor': float(profit_factor), |
| 'win_rate': float(win_rate), |
| 'avg_win': float(avg_win), |
| 'avg_loss': float(avg_loss), |
| 'num_trades': len(trade_log), |
| 'equity_curve': equity_curve.tolist(), |
| 'daily_returns': daily_returns.tolist(), |
| } |
| |
|
|
| def format_evaluation(results: Dict) -> str: |
| """Format evaluation results for display.""" |
| lines = ["β" * 70, " TRADING INTELLIGENCE SYSTEM - EVALUATION REPORT", "β" * 70] |
| |
| summary = results.get('summary', {}) |
| lines.append(f" Test Samples: {summary.get('num_test_samples', 'N/A')}") |
| lines.append(f" Avg Direction Accuracy: {summary.get('avg_direction_accuracy', 0):.1%}") |
| lines.append(f" Avg Information Coefficient: {summary.get('avg_ic', 0):.4f}") |
| lines.append("") |
| |
| for key in sorted(results.keys()): |
| if not key.startswith('horizon_'): |
| continue |
| |
| h = key.split('_')[1] |
| hr = results[key] |
| |
| lines.extend([ |
| f" βββββββββββββ Horizon: {h}-day βββββββββββββ", |
| f" β Direction Accuracy: {hr.get('direction_accuracy', 0):.1%}", |
| f" β Information Coeff: {hr.get('information_coefficient', 0):.4f}", |
| f" β Total Return: {hr.get('total_return', 0):.2%}", |
| f" β Annualized Return: {hr.get('annualized_return', 0):.2%}", |
| f" β Sharpe Ratio: {hr.get('sharpe_ratio', 0):.2f}", |
| f" β Max Drawdown: {hr.get('max_drawdown', 0):.2%}", |
| f" β Calmar Ratio: {hr.get('calmar_ratio', 0):.2f}", |
| f" β Profit Factor: {hr.get('profit_factor', 0):.2f}", |
| f" β Win Rate: {hr.get('win_rate', 0):.1%}", |
| f" β Avg Win: {hr.get('avg_win', 0):.4f}", |
| f" β Avg Loss: {hr.get('avg_loss', 0):.4f}", |
| f" β Num Trades: {hr.get('num_trades', 0)}", |
| f" β{'β' * 42}β", |
| "" |
| ]) |
| |
| lines.append("β" * 70) |
| return "\n".join(lines) |
|
|