#!/usr/bin/env python3 """ Cloud training script for rl_btc_v4 IQL. Downloads dataset from HF Hub, trains, and uploads model back. """ import json import os import sys import time from pathlib import Path import numpy as np import torch print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"CUDA device: {torch.cuda.get_device_name(0)}") print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB") # Download dataset from HF Hub from huggingface_hub import hf_hub_download, snapshot_download print("\n[v4 IQL] Downloading dataset from HF Hub...") data_path = hf_hub_download( repo_id="fbzu/btc_updown_5m_augmented_v1", filename="btc_updown_5m_augmented_v1.parquet", repo_type="dataset", token=os.environ.get("HF_TOKEN"), ) print(f"[v4 IQL] Dataset downloaded to {data_path}") print("[v4 IQL] Downloading code from HF Hub...") code_dir = snapshot_download( repo_id="fbzu/rl_btc_v4_iql", repo_type="model", token=os.environ.get("HF_TOKEN"), ) sys.path.insert(0, code_dir) from rl_btc_v4.dataset import build_offline_rl_dataset from rl_btc_v4.iql_trainer import IQLTrainer, IQLConfig from rl_btc_v4.constants import N_ACTIONS # Trackio monitoring import trackio run = trackio.init(project="rl_btc_v4_iql", name="iql_training_v4", space_id="fbzu/trackio-rl-btc-v4") # ── Training config ────────────────────────────────────────────────────────── device = "cuda" if torch.cuda.is_available() else "cpu" train_dataset, test_dataset = build_offline_rl_dataset( data_path=data_path, history_length=30, episode_span_days=30, episode_stride_days=15, risk_lambda=1.0, soft_dd_penalty=0.50, test_fraction=0.2, seed=42, ) print(f"[v4 IQL] Train transitions: {train_dataset.n_transitions}") print(f"[v4 IQL] Test transitions: {test_dataset.n_transitions}") print(f"[v4 IQL] State dim: {train_dataset.states.shape[1]}") state_dim = train_dataset.states.shape[1] config = IQLConfig( hidden_dim=256, num_layers=2, dropout=0.1, expectile=0.7, temperature=3.0, gamma=0.99, tau=0.005, learning_rate=3e-4, batch_size=512, num_epochs=100, weight_decay=1e-4, device=device, seed=42, ) trainer = IQLTrainer(state_dim=state_dim, action_dim=N_ACTIONS, config=config) t_start = time.time() def progress_fn(epoch, metrics): trackio.log({ "epoch": epoch, "q_loss": metrics["q_loss"], "v_loss": metrics["v_loss"], "policy_loss": metrics["policy_loss"], "advantage": metrics["advantage"], }) if epoch % config.eval_freq == 0 or epoch == config.num_epochs - 1: elapsed = time.time() - t_start print(f" [{elapsed:.0f}s] Epoch {epoch}: " f"Q={metrics['q_loss']:.4f} V={metrics['v_loss']:.4f} " f"π={metrics['policy_loss']:.4f} Adv={metrics['advantage']:.4f}") result = trainer.train( states=train_dataset.states, actions=train_dataset.actions, rewards=train_dataset.rewards, next_states=train_dataset.next_states, dones=train_dataset.dones, eval_states=test_dataset.states, eval_rewards=test_dataset.rewards, progress_fn=progress_fn, ) t_elapsed = time.time() - t_start print(f"\n[v4 IQL] Training complete in {t_elapsed:.1f}s") print(f"[v4 IQL] Final metrics: {result['final_metrics']}") # ── Save and upload to HF Hub ───────────────────────────────────────────────── import tempfile with tempfile.TemporaryDirectory() as tmpdir: tmp_path = Path(tmpdir) # Save model trainer.save(tmp_path) # Save normalization stats np.savez( tmp_path / "scaler.npz", mean=train_dataset.mean, std=train_dataset.std, reward_mean=result["reward_mean"], reward_std=result["reward_std"], ) # Save report report = { "algorithm": "IQL", "config": config.__dict__, "dataset": { "path": "fbzu/btc_updown_5m_augmented_v1", "history_length": 30, "episode_span_days": 30, "episode_stride_days": 15, "risk_lambda": 1.0, "soft_dd_penalty": 0.50, }, "results": result, "training_time_seconds": t_elapsed, "device": device, } (tmp_path / "train_report.json").write_text(json.dumps(report, indent=2)) print("\n[v4 IQL] Uploading trained model to HF Hub...") from huggingface_hub import HfApi api = HfApi(token=os.environ.get("HF_TOKEN")) for f in tmp_path.iterdir(): api.upload_file( path_or_fileobj=str(f), path_in_repo=f.name, repo_id="fbzu/rl_btc_v4_iql", repo_type="model", ) print(f" Uploaded {f.name}") print(f"\n[v4 IQL] Model uploaded to https://huggingface.co/fbzu/rl_btc_v4_iql") # Final trackio metrics trackio.log({ "training_time_seconds": t_elapsed, "final_q_loss": result["final_metrics"]["q_loss"], "final_v_loss": result["final_metrics"]["v_loss"], "final_policy_loss": result["final_metrics"]["policy_loss"], "final_mean_advantage": result["final_metrics"]["mean_advantage"], }) trackio.finish()