| |
| """ |
| Cloud training script for rl_btc_v4 IQL. |
| Downloads dataset from HF Hub, trains, and uploads model back. |
| """ |
| import json |
| import os |
| import sys |
| import time |
| from pathlib import Path |
|
|
| import numpy as np |
| import torch |
|
|
| print(f"PyTorch version: {torch.__version__}") |
| print(f"CUDA available: {torch.cuda.is_available()}") |
| if torch.cuda.is_available(): |
| print(f"CUDA device: {torch.cuda.get_device_name(0)}") |
| print(f"CUDA memory: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB") |
|
|
| |
| from huggingface_hub import hf_hub_download, snapshot_download |
|
|
| print("\n[v4 IQL] Downloading dataset from HF Hub...") |
| data_path = hf_hub_download( |
| repo_id="fbzu/btc_updown_5m_augmented_v1", |
| filename="btc_updown_5m_augmented_v1.parquet", |
| repo_type="dataset", |
| token=os.environ.get("HF_TOKEN"), |
| ) |
| print(f"[v4 IQL] Dataset downloaded to {data_path}") |
|
|
| print("[v4 IQL] Downloading code from HF Hub...") |
| code_dir = snapshot_download( |
| repo_id="fbzu/rl_btc_v4_iql", |
| repo_type="model", |
| token=os.environ.get("HF_TOKEN"), |
| ) |
| sys.path.insert(0, code_dir) |
|
|
| from rl_btc_v4.dataset import build_offline_rl_dataset |
| from rl_btc_v4.iql_trainer import IQLTrainer, IQLConfig |
| from rl_btc_v4.constants import N_ACTIONS |
|
|
| |
| import trackio |
| run = trackio.init(project="rl_btc_v4_iql", name="iql_training_v4", space_id="fbzu/trackio-rl-btc-v4") |
|
|
| |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| train_dataset, test_dataset = build_offline_rl_dataset( |
| data_path=data_path, |
| history_length=30, |
| episode_span_days=30, |
| episode_stride_days=15, |
| risk_lambda=1.0, |
| soft_dd_penalty=0.50, |
| test_fraction=0.2, |
| seed=42, |
| ) |
|
|
| print(f"[v4 IQL] Train transitions: {train_dataset.n_transitions}") |
| print(f"[v4 IQL] Test transitions: {test_dataset.n_transitions}") |
| print(f"[v4 IQL] State dim: {train_dataset.states.shape[1]}") |
|
|
| state_dim = train_dataset.states.shape[1] |
|
|
| config = IQLConfig( |
| hidden_dim=256, |
| num_layers=2, |
| dropout=0.1, |
| expectile=0.7, |
| temperature=3.0, |
| gamma=0.99, |
| tau=0.005, |
| learning_rate=3e-4, |
| batch_size=512, |
| num_epochs=100, |
| weight_decay=1e-4, |
| device=device, |
| seed=42, |
| ) |
|
|
| trainer = IQLTrainer(state_dim=state_dim, action_dim=N_ACTIONS, config=config) |
|
|
| t_start = time.time() |
|
|
| def progress_fn(epoch, metrics): |
| trackio.log({ |
| "epoch": epoch, |
| "q_loss": metrics["q_loss"], |
| "v_loss": metrics["v_loss"], |
| "policy_loss": metrics["policy_loss"], |
| "advantage": metrics["advantage"], |
| }) |
| if epoch % config.eval_freq == 0 or epoch == config.num_epochs - 1: |
| elapsed = time.time() - t_start |
| print(f" [{elapsed:.0f}s] Epoch {epoch}: " |
| f"Q={metrics['q_loss']:.4f} V={metrics['v_loss']:.4f} " |
| f"Ο={metrics['policy_loss']:.4f} Adv={metrics['advantage']:.4f}") |
|
|
| result = trainer.train( |
| states=train_dataset.states, |
| actions=train_dataset.actions, |
| rewards=train_dataset.rewards, |
| next_states=train_dataset.next_states, |
| dones=train_dataset.dones, |
| eval_states=test_dataset.states, |
| eval_rewards=test_dataset.rewards, |
| progress_fn=progress_fn, |
| ) |
|
|
| t_elapsed = time.time() - t_start |
| print(f"\n[v4 IQL] Training complete in {t_elapsed:.1f}s") |
| print(f"[v4 IQL] Final metrics: {result['final_metrics']}") |
|
|
| |
|
|
| import tempfile |
| with tempfile.TemporaryDirectory() as tmpdir: |
| tmp_path = Path(tmpdir) |
| |
| |
| trainer.save(tmp_path) |
| |
| |
| np.savez( |
| tmp_path / "scaler.npz", |
| mean=train_dataset.mean, |
| std=train_dataset.std, |
| reward_mean=result["reward_mean"], |
| reward_std=result["reward_std"], |
| ) |
| |
| |
| report = { |
| "algorithm": "IQL", |
| "config": config.__dict__, |
| "dataset": { |
| "path": "fbzu/btc_updown_5m_augmented_v1", |
| "history_length": 30, |
| "episode_span_days": 30, |
| "episode_stride_days": 15, |
| "risk_lambda": 1.0, |
| "soft_dd_penalty": 0.50, |
| }, |
| "results": result, |
| "training_time_seconds": t_elapsed, |
| "device": device, |
| } |
| (tmp_path / "train_report.json").write_text(json.dumps(report, indent=2)) |
| |
| print("\n[v4 IQL] Uploading trained model to HF Hub...") |
| from huggingface_hub import HfApi |
| api = HfApi(token=os.environ.get("HF_TOKEN")) |
| |
| for f in tmp_path.iterdir(): |
| api.upload_file( |
| path_or_fileobj=str(f), |
| path_in_repo=f.name, |
| repo_id="fbzu/rl_btc_v4_iql", |
| repo_type="model", |
| ) |
| print(f" Uploaded {f.name}") |
|
|
| print(f"\n[v4 IQL] Model uploaded to https://huggingface.co/fbzu/rl_btc_v4_iql") |
|
|
| |
| trackio.log({ |
| "training_time_seconds": t_elapsed, |
| "final_q_loss": result["final_metrics"]["q_loss"], |
| "final_v_loss": result["final_metrics"]["v_loss"], |
| "final_policy_loss": result["final_metrics"]["policy_loss"], |
| "final_mean_advantage": result["final_metrics"]["mean_advantage"], |
| }) |
| trackio.finish() |
|
|