Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import math | |
| # --- ACTIVATIONS & BLOCKS --- | |
| class Mish(nn.Module): | |
| """ | |
| Mish Activation: x * tanh(softplus(x)). | |
| Proved superior to ReLU for deep RNNs in low-signal regimes. | |
| """ | |
| def forward(self, x): | |
| return x * torch.tanh(F.softplus(x)) | |
| class SEBlock(nn.Module): | |
| """ | |
| Squeeze-and-Excitation Block for 1D sequences. | |
| Acts as dynamic feature selection/gating. | |
| """ | |
| def __init__(self, channel, reduction=4): | |
| super().__init__() | |
| self.fc = nn.Sequential( | |
| nn.Linear(channel, channel // reduction, bias=False), | |
| nn.ReLU(inplace=True), | |
| nn.Linear(channel // reduction, channel, bias=False), | |
| nn.Sigmoid() | |
| ) | |
| def forward(self, x): | |
| y = self.fc(x) | |
| return x * y | |
| # --- 1. THE WINNER: SE-Mish-DeepResGRU --- | |
| class PreNormGRUCell(nn.Module): | |
| def __init__(self, dim, dropout): | |
| super().__init__() | |
| self.gru = nn.GRU(dim, dim, 1, batch_first=True) | |
| self.drop = nn.Dropout(dropout) | |
| self.ln = nn.LayerNorm(dim) | |
| self.act = Mish() | |
| def forward(self, x, h): | |
| x_norm = self.ln(x) | |
| o, h_new = self.gru(x_norm, h) | |
| o = self.drop(self.act(o)) | |
| return x + o, h_new # Residual Connection | |
| class SEMishGRU(nn.Module): | |
| def __init__(self, input_size=32, hidden_size=240, layers=6): | |
| super().__init__() | |
| self.embed = nn.Linear(input_size, hidden_size) | |
| self.se = SEBlock(hidden_size) | |
| self.layers = nn.ModuleList([PreNormGRUCell(hidden_size, 0.15) for _ in range(layers)]) | |
| self.head = nn.Linear(hidden_size, 32) | |
| self.final_ln = nn.LayerNorm(hidden_size) | |
| self.layers_count = layers | |
| def forward(self, x, h_list=None): | |
| if h_list is None: h_list = [None] * self.layers_count | |
| x = self.embed(x) | |
| x = self.se(x) | |
| new_h = [] | |
| for i, layer in enumerate(self.layers): | |
| x, h = layer(x, h_list[i]) | |
| new_h.append(h) | |
| x = self.final_ln(x) | |
| return self.head(x), new_h | |
| # --- 2. THE CHALLENGER: Transformer-Encoder (Failed due to overfitting) --- | |
| class TransformerModel(nn.Module): | |
| def __init__(self, input_size=32, hidden_size=256, layers=4): | |
| super().__init__() | |
| self.embed = nn.Linear(input_size, hidden_size) | |
| enc_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=4, dim_feedforward=512, dropout=0.1, batch_first=True) | |
| self.transformer = nn.TransformerEncoder(enc_layer, num_layers=layers) | |
| self.head = nn.Linear(hidden_size, 32) | |
| def forward(self, x): | |
| x = self.embed(x) | |
| x = self.transformer(x) | |
| return self.head(x[:, -1, :]) # Predict on last step | |
| # --- FACTORY --- | |
| def get_model(name, **kwargs): | |
| if name == "winner": | |
| return SEMishGRU(**kwargs) | |
| elif name == "transformer": | |
| return TransformerModel(**kwargs) | |
| else: | |
| raise ValueError(f"Unknown model: {name}") |