hft-quant-lab / src /models.py
aayushkrm's picture
Deploy app code
818e006
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
# --- ACTIVATIONS & BLOCKS ---
class Mish(nn.Module):
"""
Mish Activation: x * tanh(softplus(x)).
Proved superior to ReLU for deep RNNs in low-signal regimes.
"""
def forward(self, x):
return x * torch.tanh(F.softplus(x))
class SEBlock(nn.Module):
"""
Squeeze-and-Excitation Block for 1D sequences.
Acts as dynamic feature selection/gating.
"""
def __init__(self, channel, reduction=4):
super().__init__()
self.fc = nn.Sequential(
nn.Linear(channel, channel // reduction, bias=False),
nn.ReLU(inplace=True),
nn.Linear(channel // reduction, channel, bias=False),
nn.Sigmoid()
)
def forward(self, x):
y = self.fc(x)
return x * y
# --- 1. THE WINNER: SE-Mish-DeepResGRU ---
class PreNormGRUCell(nn.Module):
def __init__(self, dim, dropout):
super().__init__()
self.gru = nn.GRU(dim, dim, 1, batch_first=True)
self.drop = nn.Dropout(dropout)
self.ln = nn.LayerNorm(dim)
self.act = Mish()
def forward(self, x, h):
x_norm = self.ln(x)
o, h_new = self.gru(x_norm, h)
o = self.drop(self.act(o))
return x + o, h_new # Residual Connection
class SEMishGRU(nn.Module):
def __init__(self, input_size=32, hidden_size=240, layers=6):
super().__init__()
self.embed = nn.Linear(input_size, hidden_size)
self.se = SEBlock(hidden_size)
self.layers = nn.ModuleList([PreNormGRUCell(hidden_size, 0.15) for _ in range(layers)])
self.head = nn.Linear(hidden_size, 32)
self.final_ln = nn.LayerNorm(hidden_size)
self.layers_count = layers
def forward(self, x, h_list=None):
if h_list is None: h_list = [None] * self.layers_count
x = self.embed(x)
x = self.se(x)
new_h = []
for i, layer in enumerate(self.layers):
x, h = layer(x, h_list[i])
new_h.append(h)
x = self.final_ln(x)
return self.head(x), new_h
# --- 2. THE CHALLENGER: Transformer-Encoder (Failed due to overfitting) ---
class TransformerModel(nn.Module):
def __init__(self, input_size=32, hidden_size=256, layers=4):
super().__init__()
self.embed = nn.Linear(input_size, hidden_size)
enc_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=4, dim_feedforward=512, dropout=0.1, batch_first=True)
self.transformer = nn.TransformerEncoder(enc_layer, num_layers=layers)
self.head = nn.Linear(hidden_size, 32)
def forward(self, x):
x = self.embed(x)
x = self.transformer(x)
return self.head(x[:, -1, :]) # Predict on last step
# --- FACTORY ---
def get_model(name, **kwargs):
if name == "winner":
return SEMishGRU(**kwargs)
elif name == "transformer":
return TransformerModel(**kwargs)
else:
raise ValueError(f"Unknown model: {name}")