Spaces:

aayushkrm
/

hft-quant-lab

Sleeping

App Files Files Community

aayushkrm commited on Jan 28

Commit

818e006

1 Parent(s): 924b4c2

Deploy app code

Browse files

Files changed (13) hide show

.DS_Store +0 -0
app.py +112 -0
artifacts/.DS_Store +0 -0
artifacts/best_model.pt +3 -0
requirements.txt +9 -0
src/.DS_Store +0 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-311.pyc +0 -0
src/__pycache__/engine.cpython-311.pyc +0 -0
src/__pycache__/models.cpython-311.pyc +0 -0
src/engine.py +104 -0
src/models.py +88 -0
src/utils.py +0 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import gradio as gr
+import torch
+import numpy as np
+import pandas as pd
+import plotly.graph_objects as go
+from datasets import load_dataset
+from src.models import get_model
+from src.engine import quantize_model
+# --- LOAD DATASET FROM HUGGING FACE ---
+# Replace with your actual username/dataset name
+DATASET_NAME = "aayushkrm/wunder-fund-hft-data"
+try:
+    print("Loading dataset from Hugging Face...")
+    # Load first 1% just for demo speed
+    dataset = load_dataset(DATASET_NAME, split="train[:1%]")
+    df = dataset.to_pandas()
+    # Ensure parquet structure matches
+    SEQ_IDS = df['seq_ix'].unique()
+except Exception as e:
+    print(f"Could not load HF dataset: {e}")
+    # Fallback to dummy data
+    df = None
+    SEQ_IDS = [0, 1, 2]
+def load_cached_model():
+    # Strategy ED Configuration: 32 Input, 256 Hidden, 6 Layers
+    # Note: Check if Strategy ED used 256 or 240.
+    # Your logs said Strategy ED was "SE-MISH-SWARM (Best-of-Best Fusion)".
+    # Usually this was Hidden=256.
+    model = get_model("winner", input_size=32, hidden_size=256, layers=6)
+    model_path = "artifacts/best_model.pt"
+    if os.path.exists(model_path):
+        try:
+            # Load the FP16/FP32 weights
+            state = torch.load(model_path, map_location='cpu')
+            # Strategy ED saved state_dict directly.
+            # We need to cast to float32 before quantization if they were saved as Half
+            state = {k: v.float() for k, v in state.items()}
+            model.load_state_dict(state)
+            print("✅ Loaded best_model.pt")
+        except Exception as e:
+            print(f"⚠️ Error loading model: {e}")
+    else:
+        print("⚠️ Model file not found, using random weights.")
+    # Quantize for inference (just to show off the capability)
+    model = quantize_model(model)
+    return model
+MODEL = load_cached_model()
+def inference(seq_id, steps_to_plot):
+    if df is not None:
+        # Extract sequence
+        seq_data = df[df['seq_ix'] == seq_id].sort_values('step_in_seq')
+        # Get raw features (0-31)
+        raw_values = seq_data[[str(i) for i in range(32)]].values.astype(np.float32)
+        # Normalize simple
+        mean = raw_values.mean(axis=0)
+        std = raw_values.std(axis=0) + 1e-6
+        norm_values = (raw_values - mean) / std
+    else:
+        # Dummy data
+        norm_values = np.random.randn(1000, 32).astype(np.float32)
+    # Run Inference
+    x = torch.tensor(norm_values).unsqueeze(0) # (1, 1000, 32)
+    with torch.no_grad():
+        # Get hidden states / predictions
+        # Note: The model returns (prediction, hidden), but prediction is next step
+        # We want to visualize the flow.
+        preds = []
+        h = None
+        # Slow loop for demo visualization
+        for t in range(min(len(x[0]), steps_to_plot)):
+            xt = x[:, t:t+1, :]
+            o, h = MODEL(xt, h)
+            preds.append(o.numpy()[0,0,0]) # Plot 1st feature dim
+    # Plotting
+    fig = go.Figure()
+    # Plot actual Feature 0
+    fig.add_trace(go.Scatter(y=norm_values[:steps_to_plot, 0], mode='lines', name='Actual Feature 0', line=dict(color='gray')))
+    # Plot predicted Feature 0
+    fig.add_trace(go.Scatter(y=preds, mode='lines', name='Predicted Feature 0', line=dict(color='green')))
+    fig.update_layout(title=f"Forecasting Sequence {seq_id}", xaxis_title="Time Step", yaxis_title="Normalized Value")
+    return fig
+# --- GRADIO UI ---
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# ⚡ Quant-Lab: HFT Sequence Modeling")
+    gr.Markdown(f"**Strategy ED (Rank 28):** SE-Mish-DeepResGRU (INT8 Quantized)")
+    with gr.Row():
+        seq_selector = gr.Dropdown(choices=list(SEQ_IDS[:20]), label="Select Market Sequence", value=SEQ_IDS[0])
+        step_slider = gr.Slider(minimum=50, maximum=1000, value=200, label="Steps to Visualize")
+    plot = gr.Plot(label="Forecast Visualization")
+    btn = gr.Button("Run Inference")
+    btn.click(inference, inputs=[seq_selector, step_slider], outputs=plot)
+if __name__ == "__main__":
+    demo.launch()

artifacts/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

artifacts/best_model.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a5d0b30d0dd025f271aed3de1afaa7a968270428de27b0af288daa00e812e56
+size 2480994

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch>=2.0.0
+numpy
+pandas
+scikit-learn
+streamlit
+gradio
+plotly
+tqdm
+datasets

src/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (156 Bytes). View file

src/__pycache__/engine.cpython-311.pyc ADDED Viewed

Binary file (6.99 kB). View file

src/__pycache__/models.cpython-311.pyc ADDED Viewed

Binary file (7.29 kB). View file

src/engine.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import torch
+import torch.nn as nn
+from torch.optim import Optimizer
+import math
+import os
+# --- 1. RANGER OPTIMIZER (Full Implementation) ---
+class Ranger(Optimizer):
+    def __init__(self, params, lr=1e-3, alpha=0.5, k=6, N_sma_threshhold=5, betas=(.95, 0.999), eps=1e-5, weight_decay=0):
+        defaults = dict(lr=lr, alpha=alpha, k=k, step_counter=0, betas=betas, N_sma_threshhold=N_sma_threshhold, eps=eps, weight_decay=weight_decay)
+        super().__init__(params, defaults)
+        self.N_sma_threshhold = N_sma_threshhold
+        self.alpha = alpha
+        self.k = k
+        self.radam_buffer = [[None,None,None] for ind in range(10)]
+    def __setstate__(self, state):
+        super().__setstate__(state)
+    def step(self, closure=None):
+        loss = None
+        if closure is not None: loss = closure()
+        for group in self.param_groups:
+            for p in group['params']:
+                if p.grad is None: continue
+                grad = p.grad.data.float()
+                if p.grad.is_sparse: raise RuntimeError('Ranger does not support sparse gradients')
+                p_data_fp32 = p.data.float()
+                state = self.state[p]
+                if len(state) == 0:
+                    state['step'] = 0
+                    state['exp_avg'] = torch.zeros_like(p_data_fp32)
+                    state['exp_avg_sq'] = torch.zeros_like(p_data_fp32)
+                    state['slow_buffer'] = torch.empty_like(p.data)
+                    state['slow_buffer'].copy_(p.data)
+                else:
+                    state['exp_avg'] = state['exp_avg'].type_as(p_data_fp32)
+                    state['exp_avg_sq'] = state['exp_avg_sq'].type_as(p_data_fp32)
+                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
+                beta1, beta2 = group['betas']
+                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
+                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)
+                state['step'] += 1
+                buffered = self.radam_buffer[int(state['step'] % 10)]
+                if state['step'] == buffered[0]:
+                    N_sma, step_size = buffered[1], buffered[2]
+                else:
+                    buffered[0] = state['step']
+                    beta2_t = beta2 ** state['step']
+                    N_sma_max = 2 / (1 - beta2) - 1
+                    N_sma = N_sma_max - 2 * state['step'] * beta2_t / (1 - beta2_t)
+                    buffered[1] = N_sma
+                    if N_sma >= self.N_sma_threshhold:
+                        step_size = math.sqrt((1 - beta2_t) * (N_sma - 4) / (N_sma_max - 4) * (N_sma - 2) / N_sma * N_sma_max / (N_sma_max - 2)) / (1 - beta1 ** state['step'])
+                    else:
+                        step_size = 1.0 / (1 - beta1 ** state['step'])
+                    buffered[2] = step_size
+                if group['weight_decay'] != 0:
+                    p_data_fp32.add_(p_data_fp32, alpha=-group['weight_decay'] * group['lr'])
+                if N_sma >= self.N_sma_threshhold:
+                    denom = exp_avg_sq.sqrt().add_(group['eps'])
+                    p_data_fp32.addcdiv_(exp_avg, denom, value=-step_size * group['lr'])
+                else:
+                    p_data_fp32.add_(exp_avg, alpha=-step_size * group['lr'])
+                p.data.copy_(p_data_fp32)
+                if state['step'] % group['k'] == 0:
+                    slow_p = state['slow_buffer']
+                    slow_p.add_(p.data - slow_p, alpha=self.alpha)
+                    p.data.copy_(slow_p)
+        return loss
+# --- 2. QUANTIZATION PIPELINE ---
+def quantize_model(model):
+    """
+    Applies PyTorch Dynamic INT8 Quantization.
+    """
+    model.cpu().eval()
+    q_model = torch.quantization.quantize_dynamic(
+        model,
+        {torch.nn.Linear, torch.nn.GRU, torch.nn.LSTM},
+        dtype=torch.qint8
+    )
+    return q_model
+def save_model(model, path):
+    torch.save(model.state_dict(), path)
+def load_model(model_class, path, quantized=False):
+    model = model_class()
+    if quantized:
+        model = quantize_model(model)
+        # Weights_only=False is needed for quantized state dicts
+        state = torch.load(path, map_location='cpu', weights_only=False)
+    else:
+        state = torch.load(path, map_location='cpu')
+    model.load_state_dict(state)
+    return model

src/models.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+# --- ACTIVATIONS & BLOCKS ---
+class Mish(nn.Module):
+    """
+    Mish Activation: x * tanh(softplus(x)).
+    Proved superior to ReLU for deep RNNs in low-signal regimes.
+    """
+    def forward(self, x):
+        return x * torch.tanh(F.softplus(x))
+class SEBlock(nn.Module):
+    """
+    Squeeze-and-Excitation Block for 1D sequences.
+    Acts as dynamic feature selection/gating.
+    """
+    def __init__(self, channel, reduction=4):
+        super().__init__()
+        self.fc = nn.Sequential(
+            nn.Linear(channel, channel // reduction, bias=False),
+            nn.ReLU(inplace=True),
+            nn.Linear(channel // reduction, channel, bias=False),
+            nn.Sigmoid()
+        )
+    def forward(self, x):
+        y = self.fc(x)
+        return x * y
+# --- 1. THE WINNER: SE-Mish-DeepResGRU ---
+class PreNormGRUCell(nn.Module):
+    def __init__(self, dim, dropout):
+        super().__init__()
+        self.gru = nn.GRU(dim, dim, 1, batch_first=True)
+        self.drop = nn.Dropout(dropout)
+        self.ln = nn.LayerNorm(dim)
+        self.act = Mish()
+    def forward(self, x, h):
+        x_norm = self.ln(x)
+        o, h_new = self.gru(x_norm, h)
+        o = self.drop(self.act(o))
+        return x + o, h_new # Residual Connection
+class SEMishGRU(nn.Module):
+    def __init__(self, input_size=32, hidden_size=240, layers=6):
+        super().__init__()
+        self.embed = nn.Linear(input_size, hidden_size)
+        self.se = SEBlock(hidden_size)
+        self.layers = nn.ModuleList([PreNormGRUCell(hidden_size, 0.15) for _ in range(layers)])
+        self.head = nn.Linear(hidden_size, 32)
+        self.final_ln = nn.LayerNorm(hidden_size)
+        self.layers_count = layers
+    def forward(self, x, h_list=None):
+        if h_list is None: h_list = [None] * self.layers_count
+        x = self.embed(x)
+        x = self.se(x)
+        new_h = []
+        for i, layer in enumerate(self.layers):
+            x, h = layer(x, h_list[i])
+            new_h.append(h)
+        x = self.final_ln(x)
+        return self.head(x), new_h
+# --- 2. THE CHALLENGER: Transformer-Encoder (Failed due to overfitting) ---
+class TransformerModel(nn.Module):
+    def __init__(self, input_size=32, hidden_size=256, layers=4):
+        super().__init__()
+        self.embed = nn.Linear(input_size, hidden_size)
+        enc_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=4, dim_feedforward=512, dropout=0.1, batch_first=True)
+        self.transformer = nn.TransformerEncoder(enc_layer, num_layers=layers)
+        self.head = nn.Linear(hidden_size, 32)
+    def forward(self, x):
+        x = self.embed(x)
+        x = self.transformer(x)
+        return self.head(x[:, -1, :]) # Predict on last step
+# --- FACTORY ---
+def get_model(name, **kwargs):
+    if name == "winner":
+        return SEMishGRU(**kwargs)
+    elif name == "transformer":
+        return TransformerModel(**kwargs)
+    else:
+        raise ValueError(f"Unknown model: {name}")

src/utils.py ADDED Viewed

File without changes