math-backend / dl_models.py
engineportf's picture
Upload folder using huggingface_hub
558db1e verified
Raw
History Blame Contribute Delete
9.41 kB
import numpy as np
import pandas as pd
import math
import logging
try:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
HAS_TORCH = True
except ImportError:
HAS_TORCH = False
logger = logging.getLogger("portfolio_engine")
class PositionalEncoding(nn.Module):
def __init__(self, d_model: int, max_len: int = 5000):
super().__init__()
position = torch.arange(max_len).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))
pe = torch.zeros(max_len, 1, d_model)
pe[:, 0, 0::2] = torch.sin(position * div_term)
pe[:, 0, 1::2] = torch.cos(position * div_term)
self.register_buffer('pe', pe)
def forward(self, x: torch.Tensor) -> torch.Tensor:
"""
Args:
x: Tensor, shape [seq_len, batch_size, embedding_dim]
"""
x = x + self.pe[:x.size(0)]
return x
class NoiseFilteredTransformer(nn.Module):
def __init__(self, num_features, d_model=64, nhead=4, num_layers=2, dropout=0.1):
super().__init__()
self.d_model = d_model
# 1. Noise Filter (1D Convolution)
# Input shape expected: (batch, seq_len, num_features)
# Conv1d expects: (batch, channels, length), so we will transpose
self.noise_filter = nn.Conv1d(
in_channels=num_features,
out_channels=d_model,
kernel_size=3,
padding=1
)
self.filter_activation = nn.GELU()
# 2. Positional Encoding
self.pos_encoder = PositionalEncoding(d_model)
# 3. Transformer Encoder
encoder_layers = nn.TransformerEncoderLayer(
d_model=d_model,
nhead=nhead,
dim_feedforward=d_model*4,
dropout=dropout,
batch_first=True
)
self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers)
# 4. Output Head
self.fc_out = nn.Sequential(
nn.Linear(d_model, d_model // 2),
nn.GELU(),
nn.Dropout(dropout),
nn.Linear(d_model // 2, 1)
)
def forward(self, src):
# src: (batch, seq_len, num_features)
# Apply Conv1D Noise Filter
# Transpose to (batch, features, seq_len)
x = src.transpose(1, 2)
x = self.noise_filter(x)
x = self.filter_activation(x)
# Transpose back to (batch, seq_len, d_model)
x = x.transpose(1, 2)
# Transformer expects (batch, seq_len, d_model) if batch_first=True
# But our PositionalEncoding expects (seq_len, batch, d_model)
x = x.transpose(0, 1)
x = self.pos_encoder(x)
x = x.transpose(0, 1)
# Pass through Transformer
x = self.transformer_encoder(x)
# Global Average Pooling over the sequence
x = x.mean(dim=1)
# Output prediction
out = self.fc_out(x)
return out.squeeze(-1)
class CrossAssetSequenceDataset(Dataset):
def __init__(self, features_dict, seq_len=60, scaler=None, is_train=True):
self.seq_len = seq_len
self.samples = []
self.targets = []
all_features = []
# First pass: collect all data to fit scaler if needed
for t, df in features_dict.items():
if df.empty or len(df) <= seq_len:
continue
feats = df.drop(columns=['target', 'ret'], errors='ignore').values
all_features.append(feats)
if not all_features:
self.scaler = None
return
if scaler is None and is_train:
self.scaler = StandardScaler()
stacked_feats = np.vstack(all_features)
self.scaler.fit(stacked_feats)
else:
self.scaler = scaler
# Second pass: construct sliding windows
for t, df in features_dict.items():
if df.empty or len(df) <= seq_len:
continue
feats = df.drop(columns=['target', 'ret'], errors='ignore').values
if self.scaler is not None:
feats = self.scaler.transform(feats)
targets = df['target'].values if 'target' in df.columns else np.zeros(len(df))
# Create overlapping sequences
for i in range(len(df) - seq_len):
seq = feats[i : i + seq_len]
target = targets[i + seq_len] # predicting the target at the end of the window
# Only keep non-NaN targets for training
if is_train and np.isnan(target):
continue
self.samples.append(seq)
self.targets.append(target)
def __len__(self):
return len(self.samples)
def __getitem__(self, idx):
x = torch.tensor(self.samples[idx], dtype=torch.float32)
y = torch.tensor(self.targets[idx], dtype=torch.float32)
return x, y
def train_cross_asset_transformer(features_dict, seq_len=60, epochs=10, batch_size=256, device=None, silent=False):
"""
Trains a global Cross-Asset Transformer sequence model.
"""
if not HAS_TORCH:
if not silent: logger.warning("PyTorch not installed. Cannot train Transformer.")
return None, None
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Split into train/val conceptually (80/20 by time for each asset)
train_dict = {}
val_dict = {}
for t, df in features_dict.items():
if len(df) > seq_len + 21:
split_idx = int(len(df) * 0.8)
train_dict[t] = df.iloc[:split_idx]
val_dict[t] = df.iloc[split_idx:]
train_dataset = CrossAssetSequenceDataset(train_dict, seq_len=seq_len, is_train=True)
if len(train_dataset) == 0:
return None, None
val_dataset = CrossAssetSequenceDataset(val_dict, seq_len=seq_len, scaler=train_dataset.scaler, is_train=False)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
num_features = train_dataset.samples[0].shape[1]
model = NoiseFilteredTransformer(
num_features=num_features,
d_model=64,
nhead=4,
num_layers=2,
dropout=0.1
).to(device)
criterion = nn.MSELoss()
# Using AdamW with weight decay for better regularization
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
best_val_loss = float('inf')
best_state = None
for epoch in range(epochs):
model.train()
train_loss = 0.0
for X_batch, y_batch in train_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
optimizer.zero_grad()
preds = model(X_batch)
loss = criterion(preds, y_batch)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
train_loss += loss.item() * len(X_batch)
train_loss /= len(train_loader.dataset)
model.eval()
val_loss = 0.0
with torch.no_grad():
for X_batch, y_batch in val_loader:
X_batch, y_batch = X_batch.to(device), y_batch.to(device)
preds = model(X_batch)
loss = criterion(preds, y_batch)
val_loss += loss.item() * len(X_batch)
if len(val_loader.dataset) > 0:
val_loss /= len(val_loader.dataset)
else:
val_loss = train_loss
scheduler.step(val_loss)
if val_loss < best_val_loss:
best_val_loss = val_loss
best_state = {k: v.cpu() for k, v in model.state_dict().items()}
if best_state is not None:
model.load_state_dict(best_state)
return model, train_dataset.scaler
def predict_transformer(model, scaler, features_dict, seq_len=60, device=None):
"""
Infers the latest prediction for each asset using the trained Transformer.
"""
if not HAS_TORCH or model is None:
return {}
if device is None:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()
predictions = {}
with torch.no_grad():
for t, df in features_dict.items():
if len(df) < seq_len:
continue
# Get the latest `seq_len` window
recent_feats = df.drop(columns=['target', 'ret'], errors='ignore').values[-seq_len:]
if scaler is not None:
recent_feats = scaler.transform(recent_feats)
X_tensor = torch.tensor(recent_feats, dtype=torch.float32).unsqueeze(0).to(device) # (1, seq_len, features)
pred = model(X_tensor).item()
predictions[t] = pred
return predictions