avinashhm's picture
Add trading_intelligence/evaluation.py
147319b verified
"""
Backtesting & Evaluation Module
=================================
Proper financial evaluation metrics and backtesting framework.
Metrics:
- Direction Accuracy
- Sharpe Ratio
- Max Drawdown
- Information Coefficient (IC)
- Profit Factor
- Calmar Ratio
- Win Rate
"""
import numpy as np
import pandas as pd
import torch
from typing import Dict, List, Tuple, Optional
from dataclasses import dataclass
@dataclass
class BacktestResult:
"""Complete backtesting results."""
total_return: float
annualized_return: float
sharpe_ratio: float
max_drawdown: float
calmar_ratio: float
profit_factor: float
win_rate: float
avg_win: float
avg_loss: float
num_trades: int
direction_accuracy: Dict[str, float]
information_coefficient: Dict[str, float]
equity_curve: np.ndarray
trade_log: List[Dict]
daily_returns: np.ndarray
class Evaluator:
"""
Comprehensive evaluation of trading predictions.
Computes both statistical metrics (IC, direction accuracy)
and simulated trading metrics (Sharpe, drawdown, profit factor).
"""
def __init__(self, prediction_horizons: List[int] = [1, 5, 20],
trading_costs: float = 0.001, # 10 bps per trade
initial_capital: float = 100000):
self.prediction_horizons = prediction_horizons
self.trading_costs = trading_costs
self.initial_capital = initial_capital
def evaluate_predictions(
self,
model: torch.nn.Module,
test_loader: torch.utils.data.DataLoader,
device: torch.device,
) -> Dict:
"""
Evaluate model predictions on test data.
Returns comprehensive metrics.
"""
model.eval()
all_preds = {'direction': [], 'returns': [], 'confidence': []}
all_targets = {'direction': [], 'returns': []}
with torch.no_grad():
for X_batch, y_batch in test_loader:
X_batch = X_batch.to(device)
outputs = model(X_batch)
dir_probs = torch.sigmoid(outputs['direction_logits']).cpu().numpy()
ret_preds = outputs['expected_return'].cpu().numpy()
log_var = outputs['log_variance'].cpu().numpy()
confidence = 1.0 / (1.0 + np.exp(log_var))
all_preds['direction'].append(dir_probs)
all_preds['returns'].append(ret_preds)
all_preds['confidence'].append(confidence)
# Parse targets
num_h = len(self.prediction_horizons)
y_np = y_batch.numpy()
directions = np.stack([y_np[:, i*2] for i in range(num_h)], axis=1)
returns = np.stack([y_np[:, i*2+1] for i in range(num_h)], axis=1)
all_targets['direction'].append(directions)
all_targets['returns'].append(returns)
# Concatenate
for key in all_preds:
all_preds[key] = np.concatenate(all_preds[key], axis=0)
for key in all_targets:
all_targets[key] = np.concatenate(all_targets[key], axis=0)
results = {}
# Per-horizon metrics
for i, h in enumerate(self.prediction_horizons):
horizon_results = self._evaluate_horizon(
pred_direction=all_preds['direction'][:, i],
pred_return=all_preds['returns'][:, i],
pred_confidence=all_preds['confidence'][:, i],
true_direction=all_targets['direction'][:, i],
true_return=all_targets['returns'][:, i],
horizon=h,
)
results[f'horizon_{h}'] = horizon_results
# Summary
results['summary'] = {
'num_test_samples': len(all_preds['direction']),
'avg_direction_accuracy': np.mean([
results[f'horizon_{h}']['direction_accuracy']
for h in self.prediction_horizons
]),
'avg_ic': np.mean([
results[f'horizon_{h}']['information_coefficient']
for h in self.prediction_horizons
]),
}
return results
def _evaluate_horizon(
self,
pred_direction: np.ndarray,
pred_return: np.ndarray,
pred_confidence: np.ndarray,
true_direction: np.ndarray,
true_return: np.ndarray,
horizon: int,
) -> Dict:
"""Evaluate predictions for a single horizon."""
# Direction Accuracy
pred_dir_binary = (pred_direction > 0.5).astype(float)
direction_accuracy = np.mean(pred_dir_binary == true_direction)
# Information Coefficient (Spearman rank correlation)
ic = self._spearman_ic(pred_return, true_return)
# Simulated trading
trading_results = self._simulate_trading(
pred_direction, pred_return, pred_confidence, true_return, horizon
)
return {
'direction_accuracy': float(direction_accuracy),
'information_coefficient': float(ic),
**trading_results,
}
def _spearman_ic(self, pred: np.ndarray, actual: np.ndarray) -> float:
"""Compute Information Coefficient (Spearman rank correlation)."""
valid = np.isfinite(pred) & np.isfinite(actual)
if valid.sum() < 3:
return 0.0
pred_rank = self._rank(pred[valid])
actual_rank = self._rank(actual[valid])
n = len(pred_rank)
d = pred_rank - actual_rank
ic = 1 - (6 * np.sum(d**2)) / (n * (n**2 - 1) + 1e-8)
return float(ic)
def _rank(self, x: np.ndarray) -> np.ndarray:
"""Compute ranks of array elements."""
temp = x.argsort()
ranks = np.empty_like(temp)
ranks[temp] = np.arange(len(x))
return ranks.astype(float)
def _simulate_trading(
self,
pred_direction: np.ndarray,
pred_return: np.ndarray,
pred_confidence: np.ndarray,
true_return: np.ndarray,
horizon: int,
) -> Dict:
"""
Simulate a simple long/short trading strategy.
Strategy:
- Go long when pred_direction > 0.5 and confidence > threshold
- Go short when pred_direction < 0.5 and confidence > threshold
- Position size proportional to confidence
"""
confidence_threshold = 0.55
capital = self.initial_capital
equity_curve = [capital]
trade_log = []
daily_returns = []
for i in range(0, len(pred_direction), max(horizon, 1)):
if i >= len(pred_direction):
break
conf = pred_confidence[i]
if conf < confidence_threshold:
daily_returns.append(0.0)
equity_curve.append(equity_curve[-1])
continue
# Position sizing (confidence-weighted)
position_weight = min(conf * 0.5, 0.25) # Max 25% position
# Direction
if pred_direction[i] > 0.5:
position = position_weight # Long
else:
position = -position_weight # Short
# Actual return (clipped for robustness)
actual_ret = np.clip(true_return[i], -0.20, 0.20)
# Trade PnL (including costs)
trade_pnl = position * actual_ret - abs(position) * self.trading_costs
capital *= (1 + trade_pnl)
equity_curve.append(capital)
daily_returns.append(trade_pnl)
trade_log.append({
'step': i,
'direction': 'LONG' if position > 0 else 'SHORT',
'confidence': float(conf),
'position_size': float(abs(position)),
'predicted_return': float(pred_return[i]),
'actual_return': float(actual_ret),
'pnl': float(trade_pnl),
'equity': float(capital),
})
daily_returns = np.array(daily_returns)
equity_curve = np.array(equity_curve)
# Compute metrics
total_return = (equity_curve[-1] / equity_curve[0]) - 1
# Annualized return (assuming 252 trading days)
n_periods = len(daily_returns)
if n_periods > 0 and total_return > -1:
annualized_return = (1 + total_return) ** (252 / max(n_periods, 1)) - 1
else:
annualized_return = -1.0
# Sharpe ratio
if len(daily_returns) > 1 and np.std(daily_returns) > 0:
sharpe = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252)
else:
sharpe = 0.0
# Max drawdown
running_max = np.maximum.accumulate(equity_curve)
drawdowns = (running_max - equity_curve) / (running_max + 1e-8)
max_drawdown = np.max(drawdowns) if len(drawdowns) > 0 else 0.0
# Calmar ratio
calmar = annualized_return / (max_drawdown + 1e-8) if max_drawdown > 0 else 0.0
# Win/loss analysis
wins = [t['pnl'] for t in trade_log if t['pnl'] > 0]
losses = [t['pnl'] for t in trade_log if t['pnl'] <= 0]
win_rate = len(wins) / max(len(trade_log), 1)
avg_win = np.mean(wins) if wins else 0.0
avg_loss = np.mean(losses) if losses else 0.0
profit_factor = abs(sum(wins)) / (abs(sum(losses)) + 1e-8) if losses else float('inf')
return {
'total_return': float(total_return),
'annualized_return': float(annualized_return),
'sharpe_ratio': float(sharpe),
'max_drawdown': float(max_drawdown),
'calmar_ratio': float(calmar),
'profit_factor': float(profit_factor),
'win_rate': float(win_rate),
'avg_win': float(avg_win),
'avg_loss': float(avg_loss),
'num_trades': len(trade_log),
'equity_curve': equity_curve.tolist(),
'daily_returns': daily_returns.tolist(),
}
def format_evaluation(results: Dict) -> str:
"""Format evaluation results for display."""
lines = ["═" * 70, " TRADING INTELLIGENCE SYSTEM - EVALUATION REPORT", "═" * 70]
summary = results.get('summary', {})
lines.append(f" Test Samples: {summary.get('num_test_samples', 'N/A')}")
lines.append(f" Avg Direction Accuracy: {summary.get('avg_direction_accuracy', 0):.1%}")
lines.append(f" Avg Information Coefficient: {summary.get('avg_ic', 0):.4f}")
lines.append("")
for key in sorted(results.keys()):
if not key.startswith('horizon_'):
continue
h = key.split('_')[1]
hr = results[key]
lines.extend([
f" β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Horizon: {h}-day ────────────┐",
f" β”‚ Direction Accuracy: {hr.get('direction_accuracy', 0):.1%}",
f" β”‚ Information Coeff: {hr.get('information_coefficient', 0):.4f}",
f" β”‚ Total Return: {hr.get('total_return', 0):.2%}",
f" β”‚ Annualized Return: {hr.get('annualized_return', 0):.2%}",
f" β”‚ Sharpe Ratio: {hr.get('sharpe_ratio', 0):.2f}",
f" β”‚ Max Drawdown: {hr.get('max_drawdown', 0):.2%}",
f" β”‚ Calmar Ratio: {hr.get('calmar_ratio', 0):.2f}",
f" β”‚ Profit Factor: {hr.get('profit_factor', 0):.2f}",
f" β”‚ Win Rate: {hr.get('win_rate', 0):.1%}",
f" β”‚ Avg Win: {hr.get('avg_win', 0):.4f}",
f" β”‚ Avg Loss: {hr.get('avg_loss', 0):.4f}",
f" β”‚ Num Trades: {hr.get('num_trades', 0)}",
f" β””{'─' * 42}β”˜",
""
])
lines.append("═" * 70)
return "\n".join(lines)