Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| import math | |
| import logging | |
| try: | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils.data import Dataset, DataLoader | |
| from sklearn.preprocessing import StandardScaler | |
| HAS_TORCH = True | |
| except ImportError: | |
| HAS_TORCH = False | |
| logger = logging.getLogger("portfolio_engine") | |
| class PositionalEncoding(nn.Module): | |
| def __init__(self, d_model: int, max_len: int = 5000): | |
| super().__init__() | |
| position = torch.arange(max_len).unsqueeze(1) | |
| div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)) | |
| pe = torch.zeros(max_len, 1, d_model) | |
| pe[:, 0, 0::2] = torch.sin(position * div_term) | |
| pe[:, 0, 1::2] = torch.cos(position * div_term) | |
| self.register_buffer('pe', pe) | |
| def forward(self, x: torch.Tensor) -> torch.Tensor: | |
| """ | |
| Args: | |
| x: Tensor, shape [seq_len, batch_size, embedding_dim] | |
| """ | |
| x = x + self.pe[:x.size(0)] | |
| return x | |
| class NoiseFilteredTransformer(nn.Module): | |
| def __init__(self, num_features, d_model=64, nhead=4, num_layers=2, dropout=0.1): | |
| super().__init__() | |
| self.d_model = d_model | |
| # 1. Noise Filter (1D Convolution) | |
| # Input shape expected: (batch, seq_len, num_features) | |
| # Conv1d expects: (batch, channels, length), so we will transpose | |
| self.noise_filter = nn.Conv1d( | |
| in_channels=num_features, | |
| out_channels=d_model, | |
| kernel_size=3, | |
| padding=1 | |
| ) | |
| self.filter_activation = nn.GELU() | |
| # 2. Positional Encoding | |
| self.pos_encoder = PositionalEncoding(d_model) | |
| # 3. Transformer Encoder | |
| encoder_layers = nn.TransformerEncoderLayer( | |
| d_model=d_model, | |
| nhead=nhead, | |
| dim_feedforward=d_model*4, | |
| dropout=dropout, | |
| batch_first=True | |
| ) | |
| self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers) | |
| # 4. Output Head | |
| self.fc_out = nn.Sequential( | |
| nn.Linear(d_model, d_model // 2), | |
| nn.GELU(), | |
| nn.Dropout(dropout), | |
| nn.Linear(d_model // 2, 1) | |
| ) | |
| def forward(self, src): | |
| # src: (batch, seq_len, num_features) | |
| # Apply Conv1D Noise Filter | |
| # Transpose to (batch, features, seq_len) | |
| x = src.transpose(1, 2) | |
| x = self.noise_filter(x) | |
| x = self.filter_activation(x) | |
| # Transpose back to (batch, seq_len, d_model) | |
| x = x.transpose(1, 2) | |
| # Transformer expects (batch, seq_len, d_model) if batch_first=True | |
| # But our PositionalEncoding expects (seq_len, batch, d_model) | |
| x = x.transpose(0, 1) | |
| x = self.pos_encoder(x) | |
| x = x.transpose(0, 1) | |
| # Pass through Transformer | |
| x = self.transformer_encoder(x) | |
| # Global Average Pooling over the sequence | |
| x = x.mean(dim=1) | |
| # Output prediction | |
| out = self.fc_out(x) | |
| return out.squeeze(-1) | |
| class CrossAssetSequenceDataset(Dataset): | |
| def __init__(self, features_dict, seq_len=60, scaler=None, is_train=True): | |
| self.seq_len = seq_len | |
| self.samples = [] | |
| self.targets = [] | |
| all_features = [] | |
| # First pass: collect all data to fit scaler if needed | |
| for t, df in features_dict.items(): | |
| if df.empty or len(df) <= seq_len: | |
| continue | |
| feats = df.drop(columns=['target', 'ret'], errors='ignore').values | |
| all_features.append(feats) | |
| if not all_features: | |
| self.scaler = None | |
| return | |
| if scaler is None and is_train: | |
| self.scaler = StandardScaler() | |
| stacked_feats = np.vstack(all_features) | |
| self.scaler.fit(stacked_feats) | |
| else: | |
| self.scaler = scaler | |
| # Second pass: construct sliding windows | |
| for t, df in features_dict.items(): | |
| if df.empty or len(df) <= seq_len: | |
| continue | |
| feats = df.drop(columns=['target', 'ret'], errors='ignore').values | |
| if self.scaler is not None: | |
| feats = self.scaler.transform(feats) | |
| targets = df['target'].values if 'target' in df.columns else np.zeros(len(df)) | |
| # Create overlapping sequences | |
| for i in range(len(df) - seq_len): | |
| seq = feats[i : i + seq_len] | |
| target = targets[i + seq_len] # predicting the target at the end of the window | |
| # Only keep non-NaN targets for training | |
| if is_train and np.isnan(target): | |
| continue | |
| self.samples.append(seq) | |
| self.targets.append(target) | |
| def __len__(self): | |
| return len(self.samples) | |
| def __getitem__(self, idx): | |
| x = torch.tensor(self.samples[idx], dtype=torch.float32) | |
| y = torch.tensor(self.targets[idx], dtype=torch.float32) | |
| return x, y | |
| def train_cross_asset_transformer(features_dict, seq_len=60, epochs=10, batch_size=256, device=None, silent=False): | |
| """ | |
| Trains a global Cross-Asset Transformer sequence model. | |
| """ | |
| if not HAS_TORCH: | |
| if not silent: logger.warning("PyTorch not installed. Cannot train Transformer.") | |
| return None, None | |
| if device is None: | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| # Split into train/val conceptually (80/20 by time for each asset) | |
| train_dict = {} | |
| val_dict = {} | |
| for t, df in features_dict.items(): | |
| if len(df) > seq_len + 21: | |
| split_idx = int(len(df) * 0.8) | |
| train_dict[t] = df.iloc[:split_idx] | |
| val_dict[t] = df.iloc[split_idx:] | |
| train_dataset = CrossAssetSequenceDataset(train_dict, seq_len=seq_len, is_train=True) | |
| if len(train_dataset) == 0: | |
| return None, None | |
| val_dataset = CrossAssetSequenceDataset(val_dict, seq_len=seq_len, scaler=train_dataset.scaler, is_train=False) | |
| train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False) | |
| val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False) | |
| num_features = train_dataset.samples[0].shape[1] | |
| model = NoiseFilteredTransformer( | |
| num_features=num_features, | |
| d_model=64, | |
| nhead=4, | |
| num_layers=2, | |
| dropout=0.1 | |
| ).to(device) | |
| criterion = nn.MSELoss() | |
| # Using AdamW with weight decay for better regularization | |
| optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4) | |
| scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2) | |
| best_val_loss = float('inf') | |
| best_state = None | |
| for epoch in range(epochs): | |
| model.train() | |
| train_loss = 0.0 | |
| for X_batch, y_batch in train_loader: | |
| X_batch, y_batch = X_batch.to(device), y_batch.to(device) | |
| optimizer.zero_grad() | |
| preds = model(X_batch) | |
| loss = criterion(preds, y_batch) | |
| loss.backward() | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) | |
| optimizer.step() | |
| train_loss += loss.item() * len(X_batch) | |
| train_loss /= len(train_loader.dataset) | |
| model.eval() | |
| val_loss = 0.0 | |
| with torch.no_grad(): | |
| for X_batch, y_batch in val_loader: | |
| X_batch, y_batch = X_batch.to(device), y_batch.to(device) | |
| preds = model(X_batch) | |
| loss = criterion(preds, y_batch) | |
| val_loss += loss.item() * len(X_batch) | |
| if len(val_loader.dataset) > 0: | |
| val_loss /= len(val_loader.dataset) | |
| else: | |
| val_loss = train_loss | |
| scheduler.step(val_loss) | |
| if val_loss < best_val_loss: | |
| best_val_loss = val_loss | |
| best_state = {k: v.cpu() for k, v in model.state_dict().items()} | |
| if best_state is not None: | |
| model.load_state_dict(best_state) | |
| return model, train_dataset.scaler | |
| def predict_transformer(model, scaler, features_dict, seq_len=60, device=None): | |
| """ | |
| Infers the latest prediction for each asset using the trained Transformer. | |
| """ | |
| if not HAS_TORCH or model is None: | |
| return {} | |
| if device is None: | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| model.to(device) | |
| model.eval() | |
| predictions = {} | |
| with torch.no_grad(): | |
| for t, df in features_dict.items(): | |
| if len(df) < seq_len: | |
| continue | |
| # Get the latest `seq_len` window | |
| recent_feats = df.drop(columns=['target', 'ret'], errors='ignore').values[-seq_len:] | |
| if scaler is not None: | |
| recent_feats = scaler.transform(recent_feats) | |
| X_tensor = torch.tensor(recent_feats, dtype=torch.float32).unsqueeze(0).to(device) # (1, seq_len, features) | |
| pred = model(X_tensor).item() | |
| predictions[t] = pred | |
| return predictions | |