NautilusTrainer / train_alpha.py
Nautilus AI
Deploy: Trainer to Root (Retry)
c5c085b
import torch
import torch.nn as nn
import torch.optim as optim
from huggingface_hub import snapshot_download, HfApi
import os
from data_processor import AlphaDataProcessor
from models.lstm import AlphaLSTM
from torch.utils.tensorboard import SummaryWriter
import shutil
# Configuration
REPO_ID = "gionuibk/hyperliquidL2Book-v2" # Correct dataset repo
DATA_DIR = "./data"
MODEL_DIR = "./models"
EPOCHS = 50
BATCH_SIZE = 32
LR = 0.001
def download_data():
"""Downloads dataset from HuggingFace Hub."""
print(f"Downloading data from {REPO_ID}...")
try:
snapshot_download(
repo_id=REPO_ID,
repo_type="dataset",
local_dir=DATA_DIR,
allow_patterns=["raw_trade/*.parquet"]
)
print("Download Complete.")
except Exception as e:
print(f"Warning: Could not download data (Token missing?): {e}")
def train():
writer = SummaryWriter(log_dir="./ray_results/alpha_experiment")
os.makedirs(MODEL_DIR, exist_ok=True)
# 1. Prepare Data
download_data()
processor = AlphaDataProcessor(data_dir=DATA_DIR)
# Check if data exists
if not os.path.exists(f"{DATA_DIR}/raw_trade"):
print("No data found. Ensure 'raw_trade' folder exists in dataset.")
# Create dummy data for dry-run
print("Processing Features...")
X, y = processor.get_tensors(coin="ETH", seq_len=60)
# Train/Test Split
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
train_loader = torch.utils.data.DataLoader(
torch.utils.data.TensorDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True
)
# 2. Init Model
model = AlphaLSTM(input_size=4) # features: log_ret, vol, rsi, volume
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
# 3. Training Loop
print("Starting Training...")
for epoch in range(EPOCHS):
model.train()
total_loss = 0
for batch_X, batch_y in train_loader:
optimizer.zero_grad()
outputs = model(batch_X)
loss = criterion(outputs, batch_y)
loss.backward()
optimizer.step()
total_loss += loss.item()
avg_loss = total_loss / len(train_loader)
writer.add_scalar("Loss/Train", avg_loss, epoch)
if epoch % 5 == 0:
print(f"Epoch {epoch}/{EPOCHS} | Loss: {avg_loss:.6f}")
# 4. Save ONNX
print("Exporting to ONNX...")
dummy_input = torch.randn(1, 60, 4)
onnx_path = f"{MODEL_DIR}/alpha_lstm_v1.onnx"
torch.onnx.export(
model,
dummy_input,
onnx_path,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)
print(f"Model Saved: {onnx_path}")
# 5. Push Model to HF (Optional - can be done manually or separate script)
# api = HfApi()
# api.upload_file(...)
writer.close()
if __name__ == "__main__":
train()