Fix: trading_intelligence/training.py - all 174 tests passing

244371a verified 25 days ago

16 kB

	"""
	Training Pipeline
	==================
	End-to-end training with multi-task learning,
	data loading, and proper financial time-series splits.
	"""

	import torch
	import torch.nn as nn
	import torch.optim as optim
	from torch.utils.data import Dataset, DataLoader
	import numpy as np
	import pandas as pd
	from typing import Dict, List, Tuple, Optional
	import os
	import json
	import time

	from trading_intelligence.feature_engine import FeatureEngine
	from trading_intelligence.prediction_model import TradingTransformer, MultiTaskLoss


	class FinancialTimeSeriesDataset(Dataset):
	"""
	PyTorch Dataset for financial time series.

	Uses walk-forward split (no random shuffling to preserve temporal order).
	"""

	def __init__(self, X: np.ndarray, y: np.ndarray):
	"""
	Args:
	X: (N, num_features, lookback_window) feature sequences
	y: (N, num_targets) target values
	"""
	self.X = torch.FloatTensor(X)
	self.y = torch.FloatTensor(y)

	def __len__(self):
	return len(self.X)

	def __getitem__(self, idx):
	return self.X[idx], self.y[idx]


	class TrainingPipeline:
	"""
	Complete training pipeline for the trading intelligence system.

	Features:
	1. Data loading and feature engineering
	2. Walk-forward temporal splits
	3. Multi-task training (direction + return + risk)
	4. Learning rate scheduling
	5. Early stopping with patience
	6. Comprehensive logging
	"""

	def __init__(
	self,
	lookback_window: int = 60,
	prediction_horizons: List[int] = [1, 5, 20],
	d_model: int = 128,
	n_heads: int = 8,
	n_layers: int = 3,
	d_ff: int = 256,
	patch_len: int = 8,
	stride: int = 4,
	dropout: float = 0.1,
	learning_rate: float = 1e-3,
	batch_size: int = 64,
	max_epochs: int = 100,
	patience: int = 10,
	device: str = 'auto',
	):
	self.lookback_window = lookback_window
	self.prediction_horizons = prediction_horizons
	self.d_model = d_model
	self.n_heads = n_heads
	self.n_layers = n_layers
	self.d_ff = d_ff
	self.patch_len = patch_len
	self.stride = stride
	self.dropout = dropout
	self.learning_rate = learning_rate
	self.batch_size = batch_size
	self.max_epochs = max_epochs
	self.patience = patience

	if device == 'auto':
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	else:
	self.device = torch.device(device)

	self.feature_engine = FeatureEngine(lookback_window, prediction_horizons)
	self.model = None
	self.loss_fn = None
	self.optimizer = None
	self.scheduler = None
	self.training_history = []

	def prepare_data(self, df: pd.DataFrame,
	train_ratio: float = 0.7,
	val_ratio: float = 0.15) -> Tuple[DataLoader, DataLoader, DataLoader]:
	"""
	Prepare data with walk-forward temporal splits.

	Args:
	df: Raw OHLCV DataFrame
	train_ratio: Fraction for training (earliest data)
	val_ratio: Fraction for validation (middle)

	Returns:
	train_loader, val_loader, test_loader
	"""
	# Feature engineering
	features_df = self.feature_engine.compute_all_features(df)

	# Normalize features
	features_df, self.norm_params = self.feature_engine.normalize_features(features_df)

	# Create target columns
	target_cols = []
	for h in self.prediction_horizons:
	target_cols.extend([f'target_direction_{h}', f'target_return_{h}'])

	# Create sequences
	X, y = self.feature_engine.create_sequences(features_df, target_cols=target_cols)

	# Remove any NaN/Inf
	valid_mask = np.isfinite(X).all(axis=(1, 2)) & np.isfinite(y).all(axis=1)
	X = X[valid_mask]
	y = y[valid_mask]

	print(f"Total valid samples: {len(X)}")
	print(f"Features per sample: {X.shape[1]} channels x {X.shape[2]} timesteps")
	print(f"Targets per sample: {y.shape[1]}")

	# Temporal split (NO shuffling - preserves time order)
	n = len(X)
	train_end = int(n * train_ratio)
	val_end = int(n * (train_ratio + val_ratio))

	X_train, y_train = X[:train_end], y[:train_end]
	X_val, y_val = X[train_end:val_end], y[train_end:val_end]
	X_test, y_test = X[val_end:], y[val_end:]

	print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

	# Store test data for evaluation
	self.X_test = X_test
	self.y_test = y_test

	# Create DataLoaders
	train_dataset = FinancialTimeSeriesDataset(X_train, y_train)
	val_dataset = FinancialTimeSeriesDataset(X_val, y_val)
	test_dataset = FinancialTimeSeriesDataset(X_test, y_test)

	train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
	val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
	test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)

	# Initialize model with correct number of channels
	self.num_channels = X.shape[1]
	self._init_model()

	return train_loader, val_loader, test_loader

	def _init_model(self):
	"""Initialize model, loss, optimizer, and scheduler."""
	self.model = TradingTransformer(
	num_channels=self.num_channels,
	seq_len=self.lookback_window,
	patch_len=self.patch_len,
	stride=self.stride,
	d_model=self.d_model,
	n_heads=self.n_heads,
	n_layers=self.n_layers,
	d_ff=self.d_ff,
	num_horizons=len(self.prediction_horizons),
	dropout=self.dropout,
	).to(self.device)

	self.loss_fn = MultiTaskLoss(
	num_horizons=len(self.prediction_horizons)
	).to(self.device)

	total_params = sum(p.numel() for p in self.model.parameters())
	print(f"Model initialized: {total_params:,} parameters")
	print(f"Device: {self.device}")

	self.optimizer = optim.AdamW(
	list(self.model.parameters()) + list(self.loss_fn.parameters()),
	lr=self.learning_rate,
	weight_decay=1e-4
	)

	self.scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
	self.optimizer, T_0=10, T_mult=2
	)

	def _parse_targets(self, y_batch: torch.Tensor) -> Dict[str, torch.Tensor]:
	"""Parse target tensor into direction and return components."""
	num_horizons = len(self.prediction_horizons)

	# y layout: [dir_1, ret_1, dir_5, ret_5, dir_20, ret_20]
	directions = torch.stack([y_batch[:, i*2] for i in range(num_horizons)], dim=1)
	returns = torch.stack([y_batch[:, i*2+1] for i in range(num_horizons)], dim=1)

	return {
	'direction': directions,
	'returns': returns,
	}

	def train_epoch(self, train_loader: DataLoader) -> Dict[str, float]:
	"""Train for one epoch."""
	self.model.train()
	epoch_losses = {'total': 0, 'direction': 0, 'return': 0, 'risk': 0}
	num_batches = 0

	for X_batch, y_batch in train_loader:
	X_batch = X_batch.to(self.device)
	y_batch = y_batch.to(self.device)

	# Forward pass
	predictions = self.model(X_batch)
	targets = self._parse_targets(y_batch)

	# Compute loss
	losses = self.loss_fn(predictions, targets)

	# Backward pass
	self.optimizer.zero_grad()
	losses['total_loss'].backward()

	# Gradient clipping
	torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)

	self.optimizer.step()

	# Accumulate
	epoch_losses['total'] += losses['total_loss'].item()
	epoch_losses['direction'] += losses['direction_loss'].item()
	epoch_losses['return'] += losses['return_loss'].item()
	epoch_losses['risk'] += losses['risk_loss'].item()
	num_batches += 1

	return {k: v / max(num_batches, 1) for k, v in epoch_losses.items()}

	@torch.no_grad()
	def validate(self, val_loader: DataLoader) -> Dict[str, float]:
	"""Validate model."""
	self.model.eval()
	epoch_losses = {'total': 0, 'direction': 0, 'return': 0, 'risk': 0}
	all_direction_preds = []
	all_direction_targets = []
	num_batches = 0

	for X_batch, y_batch in val_loader:
	X_batch = X_batch.to(self.device)
	y_batch = y_batch.to(self.device)

	predictions = self.model(X_batch)
	targets = self._parse_targets(y_batch)

	losses = self.loss_fn(predictions, targets)

	epoch_losses['total'] += losses['total_loss'].item()
	epoch_losses['direction'] += losses['direction_loss'].item()
	epoch_losses['return'] += losses['return_loss'].item()
	epoch_losses['risk'] += losses['risk_loss'].item()

	# Track direction accuracy
	dir_preds = (torch.sigmoid(predictions['direction_logits']) > 0.5).float()
	all_direction_preds.append(dir_preds.cpu())
	all_direction_targets.append(targets['direction'].cpu())
	num_batches += 1

	avg_losses = {k: v / max(num_batches, 1) for k, v in epoch_losses.items()}

	# Direction accuracy per horizon
	if all_direction_preds:
	all_preds = torch.cat(all_direction_preds, dim=0)
	all_targets = torch.cat(all_direction_targets, dim=0)
	for i, h in enumerate(self.prediction_horizons):
	acc = (all_preds[:, i] == all_targets[:, i]).float().mean().item()
	avg_losses[f'direction_acc_{h}'] = acc

	return avg_losses

	def train(self, train_loader: DataLoader, val_loader: DataLoader) -> Dict:
	"""
	Full training loop with early stopping.

	Returns training history.
	"""
	best_val_loss = float('inf')
	patience_counter = 0
	best_model_state = None

	print(f"\n{'='*60}")
	print(f"Starting Training ({self.max_epochs} max epochs)")
	print(f"{'='*60}")

	for epoch in range(self.max_epochs):
	start = time.time()

	# Train
	train_losses = self.train_epoch(train_loader)

	# Validate
	val_metrics = self.validate(val_loader)

	# Update scheduler
	self.scheduler.step()

	elapsed = time.time() - start

	# Log
	epoch_record = {
	'epoch': epoch + 1,
	'train_loss': train_losses['total'],
	'val_loss': val_metrics['total'],
	'train_dir_loss': train_losses['direction'],
	'val_dir_loss': val_metrics['direction'],
	'train_ret_loss': train_losses['return'],
	'val_ret_loss': val_metrics['return'],
	'lr': self.optimizer.param_groups[0]['lr'],
	'elapsed': elapsed,
	}
	for h in self.prediction_horizons:
	key = f'direction_acc_{h}'
	if key in val_metrics:
	epoch_record[key] = val_metrics[key]

	self.training_history.append(epoch_record)

	# Print progress
	acc_str = " \| ".join([
	f"DA-{h}d: {val_metrics.get(f'direction_acc_{h}', 0):.1%}"
	for h in self.prediction_horizons
	])
	print(
	f"Epoch {epoch+1:3d}/{self.max_epochs} \| "
	f"Train: {train_losses['total']:.4f} \| "
	f"Val: {val_metrics['total']:.4f} \| "
	f"{acc_str} \| "
	f"LR: {self.optimizer.param_groups[0]['lr']:.6f} \| "
	f"{elapsed:.1f}s"
	)

	# Early stopping
	if val_metrics['total'] < best_val_loss:
	best_val_loss = val_metrics['total']
	patience_counter = 0
	best_model_state = {k: v.cpu().clone() for k, v in self.model.state_dict().items()}
	else:
	patience_counter += 1
	if patience_counter >= self.patience:
	print(f"\nEarly stopping at epoch {epoch+1} (patience={self.patience})")
	break

	# Restore best model
	if best_model_state:
	self.model.load_state_dict(best_model_state)
	self.model.to(self.device)
	print(f"Restored best model (val_loss={best_val_loss:.4f})")

	return {
	'best_val_loss': best_val_loss,
	'total_epochs': len(self.training_history),
	'history': self.training_history,
	}

	def save_model(self, path: str):
	"""Save model and training artifacts."""
	os.makedirs(os.path.dirname(path) if os.path.dirname(path) else '.', exist_ok=True)

	save_dict = {
	'model_state': self.model.state_dict(),
	'loss_fn_state': self.loss_fn.state_dict(),
	'norm_params': self.norm_params if hasattr(self, 'norm_params') else {},
	'feature_names': self.feature_engine.feature_names,
	'config': {
	'lookback_window': self.lookback_window,
	'prediction_horizons': self.prediction_horizons,
	'num_channels': self.num_channels,
	'd_model': self.d_model,
	'n_heads': self.n_heads,
	'n_layers': self.n_layers,
	'd_ff': self.d_ff,
	'patch_len': self.patch_len,
	'stride': self.stride,
	'dropout': self.dropout,
	},
	'training_history': self.training_history,
	}

	torch.save(save_dict, path)
	print(f"Model saved to {path}")

	def load_model(self, path: str):
	"""Load model from checkpoint."""
	checkpoint = torch.load(path, map_location=self.device, weights_only=False)
	config = checkpoint['config']

	# Restore all architecture params from checkpoint
	self.num_channels = config['num_channels']
	self.d_model = config.get('d_model', self.d_model)
	self.n_heads = config.get('n_heads', self.n_heads)
	self.n_layers = config.get('n_layers', self.n_layers)
	self.d_ff = config.get('d_ff', self.d_ff)
	self.patch_len = config.get('patch_len', self.patch_len)
	self.stride = config.get('stride', self.stride)
	self.dropout = config.get('dropout', self.dropout)
	self.lookback_window = config.get('lookback_window', self.lookback_window)
	if 'prediction_horizons' in config:
	self.prediction_horizons = config['prediction_horizons']

	self._init_model()

	self.model.load_state_dict(checkpoint['model_state'])
	self.loss_fn.load_state_dict(checkpoint['loss_fn_state'])
	self.norm_params = checkpoint.get('norm_params', {})
	self.feature_engine.feature_names = checkpoint.get('feature_names', [])
	self.training_history = checkpoint.get('training_history', [])

	print(f"Model loaded from {path}")