Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| from torch.utils.data import Dataset, DataLoader | |
| import numpy as np | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| import os | |
| from datetime import datetime | |
| import json | |
| def get_model_device(model): | |
| return next(iter(model.parameters())).device | |
| class RGLRU(nn.Module): | |
| def __init__(self, hidden_size: int, c: float = 8.0): | |
| super().__init__() | |
| self.hidden_size = hidden_size | |
| self.c = c | |
| self.input_gate = nn.Linear(hidden_size, hidden_size, bias=False) | |
| self.recurrence_gate = nn.Linear(hidden_size, hidden_size, bias=False) | |
| self._base_param = nn.Parameter(torch.empty(hidden_size)) | |
| nn.init.normal_(self._base_param, mean=0.0, std=1.0) # ok to be any real | |
| def forward(self, x_t: torch.Tensor, state: torch.Tensor) -> torch.Tensor: | |
| batch_size, hidden_size = x_t.shape | |
| assert hidden_size == self.hidden_size | |
| assert state.shape[0] == batch_size | |
| i_t = torch.sigmoid(self.input_gate(x_t)) | |
| r_t = torch.sigmoid(self.recurrence_gate(x_t)) # in (0,1) | |
| eps = 1e-4 | |
| base = torch.sigmoid(self._base_param).unsqueeze(0) # shape (1, hidden) | |
| base = base.clamp(min=eps, max=1.0 - eps) | |
| # exponent = c * r_t (positive) | |
| a_t = base ** ( | |
| self.c * r_t | |
| ) # shape (batch, hidden), safe because base in (0,1) | |
| # ensure numerical stability for sqrt | |
| one_minus_sq = 1.0 - a_t * a_t | |
| one_minus_sq = torch.clamp(one_minus_sq, min=0.0) | |
| multiplier = torch.sqrt(one_minus_sq) | |
| new_state = (state * a_t) + (multiplier * (i_t * x_t)) | |
| return new_state | |
| def init_state(self, batch_size: int, device: torch.device | None = None): | |
| if device is None: | |
| device = get_model_device(self) | |
| return torch.zeros(batch_size, self.hidden_size, device=device) | |
| class CausalConv1d(nn.Module): | |
| def __init__(self, hidden_size, kernel_size): | |
| super().__init__() | |
| self.hidden_size = hidden_size | |
| self.kernel_size = kernel_size | |
| self.conv = nn.Conv1d( | |
| hidden_size, hidden_size, kernel_size, groups=hidden_size, bias=True | |
| ) | |
| def init_state(self, batch_size: int, device: torch.device | None = None): | |
| if device is None: | |
| device = get_model_device(self) | |
| return torch.zeros( | |
| batch_size, self.hidden_size, self.kernel_size - 1, device=device | |
| ) | |
| def forward(self, x: torch.Tensor, state: torch.Tensor): | |
| x_with_state = torch.concat([state, x[:, :, None]], dim=-1) | |
| out = self.conv(x_with_state) | |
| new_state = x_with_state[:, :, 1:] | |
| return out.squeeze(-1), new_state | |
| class Hawk(nn.Module): | |
| def __init__(self, hidden_size: int, conv_kernel_size: int = 4): | |
| super().__init__() | |
| self.conv_kernel_size = conv_kernel_size | |
| self.hidden_size = hidden_size | |
| self.gate_proj = nn.Linear(hidden_size, hidden_size, bias=False) | |
| self.recurrent_proj = nn.Linear(hidden_size, hidden_size, bias=False) | |
| self.conv = CausalConv1d(hidden_size, conv_kernel_size) | |
| self.rglru = RGLRU(hidden_size) | |
| self.out_proj = nn.Linear(hidden_size, hidden_size, bias=False) | |
| def forward( | |
| self, x: torch.Tensor, state: tuple[torch.Tensor, torch.Tensor] | |
| ) -> tuple[torch.Tensor, list[torch.Tensor]]: | |
| conv_state, rglru_state = state | |
| batch_size, hidden_size = x.shape | |
| assert batch_size == conv_state.shape[0] == rglru_state.shape[0] | |
| assert self.hidden_size == hidden_size == rglru_state.shape[1] | |
| gate = F.gelu(self.gate_proj(x)) | |
| x = self.recurrent_proj(x) | |
| x, new_conv_state = self.conv(x, conv_state) | |
| new_rglru_state = self.rglru(x, rglru_state) | |
| gated = gate * new_rglru_state | |
| out = self.out_proj(gated) | |
| new_state = [new_conv_state, new_rglru_state] | |
| return out, new_state | |
| def init_state( | |
| self, batch_size: int, device: torch.device | None = None | |
| ) -> list[torch.Tensor]: | |
| return [ | |
| self.conv.init_state(batch_size, device), | |
| self.rglru.init_state(batch_size, device), | |
| ] | |
| class HawkPredictor(nn.Module): | |
| """Full model with input projection and output head""" | |
| def __init__( | |
| self, | |
| input_size: int, | |
| hidden_size: int, | |
| num_layers: int = 2, | |
| conv_kernel_size: int = 4, | |
| dropout: float = 0.1, | |
| ): | |
| super().__init__() | |
| self.input_size = input_size | |
| self.hidden_size = hidden_size | |
| self.num_layers = num_layers | |
| # Input projection | |
| self.input_proj = nn.Linear(input_size, hidden_size) | |
| self.input_norm = nn.LayerNorm(hidden_size) | |
| # Hawk layers | |
| self.hawk_layers = nn.ModuleList( | |
| [Hawk(hidden_size, conv_kernel_size) for _ in range(num_layers)] | |
| ) | |
| # Layer norms | |
| self.layer_norms = nn.ModuleList( | |
| [nn.LayerNorm(hidden_size) for _ in range(num_layers)] | |
| ) | |
| # Dropout | |
| self.dropout = nn.Dropout(dropout) | |
| # Output head | |
| self.output_head = nn.Sequential( | |
| nn.Linear(hidden_size, hidden_size // 2), | |
| nn.GELU(), | |
| nn.Dropout(dropout), | |
| nn.Linear(hidden_size // 2, 1), | |
| ) | |
| def forward(self, x: torch.Tensor, states=None): | |
| """ | |
| Args: | |
| x: (batch_size, seq_len, input_size) | |
| states: list of states for each layer | |
| Returns: | |
| predictions: (batch_size, seq_len, 1) | |
| final_states: list of final states | |
| """ | |
| batch_size, seq_len, _ = x.shape | |
| device = x.device | |
| # Initialize states if needed | |
| if states is None: | |
| states = [ | |
| layer.init_state(batch_size, device) for layer in self.hawk_layers | |
| ] | |
| # Input projection | |
| x = self.input_proj(x) # (batch, seq, hidden) | |
| x = self.input_norm(x) | |
| outputs = [] | |
| final_states = [] | |
| # Process sequence | |
| for t in range(seq_len): | |
| x_t = x[:, t, :] # (batch, hidden) | |
| # Pass through Hawk layers | |
| new_states = [] | |
| for i, (hawk_layer, layer_norm) in enumerate( | |
| zip(self.hawk_layers, self.layer_norms) | |
| ): | |
| residual = x_t | |
| x_t, state = hawk_layer(x_t, states[i]) | |
| x_t = layer_norm(x_t + residual) | |
| x_t = self.dropout(x_t) | |
| new_states.append(state) | |
| states = new_states | |
| outputs.append(x_t) | |
| # Stack outputs | |
| outputs = torch.stack(outputs, dim=1) # (batch, seq, hidden) | |
| # Generate predictions | |
| predictions = self.output_head(outputs) # (batch, seq, 1) | |
| return predictions, states | |
| class TimeSeriesDataset(Dataset): | |
| def __init__(self, features, targets, seq_length=20): | |
| self.features = features | |
| self.targets = targets | |
| self.seq_length = seq_length | |
| def __len__(self): | |
| return len(self.features) - self.seq_length | |
| def __getitem__(self, idx): | |
| x = self.features[idx : idx + self.seq_length] | |
| y = self.targets[idx : idx + self.seq_length] | |
| return torch.FloatTensor(x), torch.FloatTensor(y).squeeze(-1) | |
| class MetricsLogger: | |
| def __init__(self, save_dir): | |
| self.save_dir = save_dir | |
| self.metrics = { | |
| "train_loss": [], | |
| "val_loss": [], | |
| "train_mse": [], | |
| "val_mse": [], | |
| "train_mae": [], | |
| "val_mae": [], | |
| "learning_rates": [], | |
| } | |
| def update(self, epoch_metrics): | |
| for key, value in epoch_metrics.items(): | |
| if key in self.metrics: | |
| self.metrics[key].append(value) | |
| def save(self): | |
| with open(os.path.join(self.save_dir, "metrics.json"), "w") as f: | |
| json.dump(self.metrics, f, indent=4) | |
| def plot_metrics(self): | |
| fig, axes = plt.subplots(2, 2, figsize=(15, 10)) | |
| fig.suptitle("Training Metrics", fontsize=16) | |
| # Loss | |
| ax = axes[0, 0] | |
| ax.plot(self.metrics["train_loss"], label="Train Loss", marker="o") | |
| ax.plot(self.metrics["val_loss"], label="Val Loss", marker="s") | |
| ax.set_xlabel("Epoch") | |
| ax.set_ylabel("Loss") | |
| ax.set_title("Training and Validation Loss") | |
| ax.legend() | |
| ax.grid(True) | |
| # MSE | |
| ax = axes[0, 1] | |
| ax.plot(self.metrics["train_mse"], label="Train MSE", marker="o") | |
| ax.plot(self.metrics["val_mse"], label="Val MSE", marker="s") | |
| ax.set_xlabel("Epoch") | |
| ax.set_ylabel("MSE") | |
| ax.set_title("Mean Squared Error") | |
| ax.legend() | |
| ax.grid(True) | |
| # MAE | |
| ax = axes[1, 0] | |
| ax.plot(self.metrics["train_mae"], label="Train MAE", marker="o") | |
| ax.plot(self.metrics["val_mae"], label="Val MAE", marker="s") | |
| ax.set_xlabel("Epoch") | |
| ax.set_ylabel("MAE") | |
| ax.set_title("Mean Absolute Error") | |
| ax.legend() | |
| ax.grid(True) | |
| # Learning Rate | |
| ax = axes[1, 1] | |
| ax.plot(self.metrics["learning_rates"], marker="o", color="purple") | |
| ax.set_xlabel("Epoch") | |
| ax.set_ylabel("Learning Rate") | |
| ax.set_title("Learning Rate Schedule") | |
| ax.grid(True) | |
| ax.set_yscale("log") | |
| plt.tight_layout() | |
| plt.savefig(os.path.join(self.save_dir, "training_metrics.png"), dpi=300) | |
| plt.close() | |
| def calculate_metrics(predictions, targets): | |
| """Calculate MSE and MAE""" | |
| mse = F.mse_loss(predictions, targets).item() | |
| mae = F.l1_loss(predictions, targets).item() | |
| return mse, mae | |
| def save_checkpoint( | |
| model, optimizer, scheduler, epoch, metrics, save_dir, is_best=False | |
| ): | |
| checkpoint = { | |
| "epoch": epoch, | |
| "model_state_dict": model.state_dict(), | |
| "optimizer_state_dict": optimizer.state_dict(), | |
| "scheduler_state_dict": scheduler.state_dict() if scheduler else None, | |
| "metrics": metrics, | |
| } | |
| # Save regular checkpoint | |
| checkpoint_path = os.path.join(save_dir, f"checkpoint_epoch_{epoch}.pt") | |
| torch.save(checkpoint, checkpoint_path) | |
| # Save best model | |
| if is_best: | |
| best_path = os.path.join(save_dir, "best_model.pt") | |
| torch.save(checkpoint, best_path) | |
| print(f"✓ Saved best model at epoch {epoch}") | |
| # Keep only last 5 checkpoints | |
| checkpoints = sorted( | |
| [f for f in os.listdir(save_dir) if f.startswith("checkpoint_epoch_")] | |
| ) | |
| if len(checkpoints) > 5: | |
| for old_ckpt in checkpoints[:-5]: | |
| os.remove(os.path.join(save_dir, old_ckpt)) | |
| def train_epoch(model, train_loader, optimizer, criterion, device): | |
| model.train() | |
| total_loss = 0 | |
| all_predictions = [] | |
| all_targets = [] | |
| for batch_idx, (x, y) in enumerate(train_loader): | |
| x, y = x.to(device), y.to(device) | |
| optimizer.zero_grad() | |
| # Forward pass | |
| predictions, _ = model(x) | |
| predictions = predictions.squeeze(-1) | |
| # Calculate loss | |
| loss = criterion(predictions, y) | |
| # Backward pass | |
| loss.backward() | |
| torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) | |
| optimizer.step() | |
| total_loss += loss.item() | |
| all_predictions.append(predictions.detach()) | |
| all_targets.append(y.detach()) | |
| avg_loss = total_loss / len(train_loader) | |
| all_predictions = torch.cat(all_predictions, dim=0) | |
| all_targets = torch.cat(all_targets, dim=0) | |
| mse, mae = calculate_metrics(all_predictions, all_targets) | |
| return avg_loss, mse, mae | |
| def validate(model, val_loader, criterion, device): | |
| model.eval() | |
| total_loss = 0 | |
| all_predictions = [] | |
| all_targets = [] | |
| with torch.no_grad(): | |
| for x, y in val_loader: | |
| x, y = x.to(device), y.to(device) | |
| predictions, _ = model(x) | |
| predictions = predictions.squeeze(-1) | |
| loss = criterion(predictions, y) | |
| total_loss += loss.item() | |
| all_predictions.append(predictions) | |
| all_targets.append(y) | |
| avg_loss = total_loss / len(val_loader) | |
| all_predictions = torch.cat(all_predictions, dim=0) | |
| all_targets = torch.cat(all_targets, dim=0) | |
| mse, mae = calculate_metrics(all_predictions, all_targets) | |
| return avg_loss, mse, mae | |
| def train_model(model, train_loader, val_loader, config): | |
| """Main training loop""" | |
| device = config["device"] | |
| model = model.to(device) | |
| # Setup | |
| criterion = nn.MSELoss() | |
| optimizer = torch.optim.AdamW( | |
| model.parameters(), | |
| lr=config["learning_rate"], | |
| weight_decay=config["weight_decay"], | |
| ) | |
| scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( | |
| optimizer, mode="min", factor=0.5, patience=5, verbose=True | |
| ) | |
| # Create save directory | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
| save_dir = os.path.join(config["save_dir"], f"run_{timestamp}") | |
| os.makedirs(save_dir, exist_ok=True) | |
| # Save config | |
| with open(os.path.join(save_dir, "config.json"), "w") as f: | |
| json.dump(config, f, indent=4) | |
| # Initialize logger | |
| logger = MetricsLogger(save_dir) | |
| best_val_loss = float("inf") | |
| print(f"{'='*60}") | |
| print(f"Training started at {timestamp}") | |
| print(f"Model: {config['model_name']}") | |
| print(f"Device: {device}") | |
| print(f"Save directory: {save_dir}") | |
| print(f"{'='*60}\n") | |
| # Training loop | |
| for epoch in range(1, config["num_epochs"] + 1): | |
| # Train | |
| train_loss, train_mse, train_mae = train_epoch( | |
| model, train_loader, optimizer, criterion, device | |
| ) | |
| # Validate | |
| val_loss, val_mse, val_mae = validate(model, val_loader, criterion, device) | |
| # Update scheduler | |
| scheduler.step(val_loss) | |
| current_lr = optimizer.param_groups[0]["lr"] | |
| # Log metrics | |
| epoch_metrics = { | |
| "train_loss": train_loss, | |
| "val_loss": val_loss, | |
| "train_mse": train_mse, | |
| "val_mse": val_mse, | |
| "train_mae": train_mae, | |
| "val_mae": val_mae, | |
| "learning_rates": current_lr, | |
| } | |
| logger.update(epoch_metrics) | |
| # Print progress | |
| print(f"Epoch {epoch}/{config['num_epochs']}") | |
| print( | |
| f" Train - Loss: {train_loss:.6f}, MSE: {train_mse:.6f}, MAE: {train_mae:.6f}" | |
| ) | |
| print(f" Val - Loss: {val_loss:.6f}, MSE: {val_mse:.6f}, MAE: {val_mae:.6f}") | |
| print(f" LR: {current_lr:.2e}") | |
| # Save checkpoint | |
| is_best = val_loss < best_val_loss | |
| if is_best: | |
| best_val_loss = val_loss | |
| if epoch % config["save_every"] == 0 or is_best: | |
| save_checkpoint( | |
| model, optimizer, scheduler, epoch, epoch_metrics, save_dir, is_best | |
| ) | |
| # Plot metrics every 10 epochs | |
| if epoch % 10 == 0: | |
| logger.plot_metrics() | |
| print() | |
| # Final save | |
| logger.save() | |
| logger.plot_metrics() | |
| print(f"{'='*60}") | |
| print(f"Training completed!") | |
| print(f"Best validation loss: {best_val_loss:.6f}") | |
| print(f"Results saved to: {save_dir}") | |
| print(f"{'='*60}") | |
| return model, logger | |
| if __name__ == "__main__": | |
| from data_prep.data_clean import clean_indicator | |
| from data_prep.data_load import prepare_data | |
| torch.autograd.set_detect_anomaly(True) | |
| # Configuration | |
| config = { | |
| 'model_name': 'HawkPredictor', | |
| 'seq_length': 20, | |
| 'hidden_size': 128, | |
| 'num_layers': 3, | |
| 'conv_kernel_size': 4, | |
| 'dropout': 0.2, | |
| 'batch_size': 64, | |
| 'num_epochs': 100, | |
| 'learning_rate': 0.001, | |
| 'weight_decay': 1e-5, | |
| 'train_split': 0.8, | |
| 'save_every': 5, | |
| 'save_dir': './checkpoints', | |
| 'device': 'cuda' if torch.cuda.is_available() else 'cpu' | |
| } | |
| print("Loading data...") | |
| test_dir = "/home/aman/code/ml_fr/ml_stocks/data/NIFTY_5_years.csv" | |
| load_df = prepare_data(test_dir) | |
| df = clean_indicator(load_df) | |
| # Prepare features and target | |
| target_col = "Daily_Return" | |
| feature_cols = [col for col in df.columns if col != target_col] | |
| train_size = int(len(df) * config["train_split"]) | |
| train_df = df[:train_size] | |
| val_df = df[train_size:] | |
| scaler = StandardScaler() | |
| train_features = scaler.fit_transform(train_df[feature_cols].values) | |
| val_features = scaler.transform(val_df[feature_cols].values) | |
| train_targets = train_df[target_col].values.reshape(-1, 1) | |
| val_targets = val_df[target_col].values.reshape(-1, 1) | |
| # Create datasets | |
| train_dataset = TimeSeriesDataset( | |
| train_features, train_targets, config["seq_length"] | |
| ) | |
| val_dataset = TimeSeriesDataset(val_features, val_targets, config["seq_length"]) | |
| train_loader = DataLoader( | |
| train_dataset, batch_size=config["batch_size"], shuffle=True, num_workers=0 | |
| ) | |
| val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], | |
| shuffle=False, num_workers=0) | |
| print(f"Training samples: {len(train_dataset)}") | |
| print(f"Validation samples: {len(val_dataset)}") | |
| print(f"Input features: {len(feature_cols)}") | |
| # Initialize model | |
| model = HawkPredictor( | |
| input_size=len(feature_cols), | |
| hidden_size=config['hidden_size'], | |
| num_layers=config['num_layers'], | |
| conv_kernel_size=config['conv_kernel_size'], | |
| dropout=config['dropout'] | |
| ) | |
| print(f"\nModel parameters: {sum(p.numel() for p in model.parameters()):,}") | |
| # Train model | |
| trained_model, metrics_logger = train_model(model, train_loader, val_loader, config) | |
| print("\nTraining complete! Check the checkpoints directory for saved models and metrics.") | |