Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from huggingface_hub import snapshot_download, HfApi | |
| import os | |
| from data_processor import AlphaDataProcessor | |
| from models.lstm import AlphaLSTM | |
| from torch.utils.tensorboard import SummaryWriter | |
| import shutil | |
| # Configuration | |
| REPO_ID = "gionuibk/hyperliquidL2Book-v2" # Correct dataset repo | |
| DATA_DIR = "./data" | |
| MODEL_DIR = "./models" | |
| EPOCHS = 50 | |
| BATCH_SIZE = 32 | |
| LR = 0.001 | |
| def download_data(): | |
| """Downloads dataset from HuggingFace Hub.""" | |
| print(f"Downloading data from {REPO_ID}...") | |
| try: | |
| snapshot_download( | |
| repo_id=REPO_ID, | |
| repo_type="dataset", | |
| local_dir=DATA_DIR, | |
| allow_patterns=["raw_trade/*.parquet"] | |
| ) | |
| print("Download Complete.") | |
| except Exception as e: | |
| print(f"Warning: Could not download data (Token missing?): {e}") | |
| def train(): | |
| writer = SummaryWriter(log_dir="./ray_results/alpha_experiment") | |
| os.makedirs(MODEL_DIR, exist_ok=True) | |
| # 1. Prepare Data | |
| download_data() | |
| processor = AlphaDataProcessor(data_dir=DATA_DIR) | |
| # Check if data exists | |
| if not os.path.exists(f"{DATA_DIR}/raw_trade"): | |
| print("No data found. Ensure 'raw_trade' folder exists in dataset.") | |
| # Create dummy data for dry-run | |
| print("Processing Features...") | |
| X, y = processor.get_tensors(coin="ETH", seq_len=60) | |
| # Train/Test Split | |
| train_size = int(len(X) * 0.8) | |
| X_train, X_test = X[:train_size], X[train_size:] | |
| y_train, y_test = y[:train_size], y[train_size:] | |
| train_loader = torch.utils.data.DataLoader( | |
| torch.utils.data.TensorDataset(X_train, y_train), batch_size=BATCH_SIZE, shuffle=True | |
| ) | |
| # 2. Init Model | |
| model = AlphaLSTM(input_size=4) # features: log_ret, vol, rsi, volume | |
| criterion = nn.MSELoss() | |
| optimizer = optim.Adam(model.parameters(), lr=LR) | |
| # 3. Training Loop | |
| print("Starting Training...") | |
| for epoch in range(EPOCHS): | |
| model.train() | |
| total_loss = 0 | |
| for batch_X, batch_y in train_loader: | |
| optimizer.zero_grad() | |
| outputs = model(batch_X) | |
| loss = criterion(outputs, batch_y) | |
| loss.backward() | |
| optimizer.step() | |
| total_loss += loss.item() | |
| avg_loss = total_loss / len(train_loader) | |
| writer.add_scalar("Loss/Train", avg_loss, epoch) | |
| if epoch % 5 == 0: | |
| print(f"Epoch {epoch}/{EPOCHS} | Loss: {avg_loss:.6f}") | |
| # 4. Save ONNX | |
| print("Exporting to ONNX...") | |
| dummy_input = torch.randn(1, 60, 4) | |
| onnx_path = f"{MODEL_DIR}/alpha_lstm_v1.onnx" | |
| torch.onnx.export( | |
| model, | |
| dummy_input, | |
| onnx_path, | |
| input_names=['input'], | |
| output_names=['output'], | |
| dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}} | |
| ) | |
| print(f"Model Saved: {onnx_path}") | |
| # 5. Push Model to HF (Optional - can be done manually or separate script) | |
| # api = HfApi() | |
| # api.upload_file(...) | |
| writer.close() | |
| if __name__ == "__main__": | |
| train() | |