| """ |
| Training configuration for the multi-agent trading environment. |
| """ |
|
|
| from dataclasses import dataclass, field |
| from typing import Dict, List, Optional |
|
|
|
|
| @dataclass |
| class TrainingConfig: |
| """Hyperparameters and configuration for training.""" |
|
|
| |
| data_source: str = "ccxt" |
| tickers: List[str] = field(default_factory=lambda: ["BTC/USDT", "ETH/USDT"]) |
| start_date: str = "2024-01-01" |
| end_date: str = "2024-12-31" |
| train_split: float = 0.8 |
|
|
| |
| initial_cash: float = 100_000.0 |
| commission: float = 0.0005 |
| max_steps: Optional[int] = None |
|
|
| |
| reward_weights: Dict[str, float] = field(default_factory=lambda: { |
| "profit": 1.0, |
| "drawdown": 0.8, |
| "volatility": 0.2, |
| "sharpe": 0.5, |
| "overtrading": 0.05, |
| "hold_penalty": 0.01, |
| "directional_bonus": 0.3, |
| }) |
|
|
| |
| num_episodes: int = 200 |
| learning_rate: float = 1e-4 |
| gamma: float = 0.99 |
| seed: int = 42 |
|
|
| |
| trader_aggression: float = 0.6 |
| risk_max_drawdown: float = 0.30 |
| risk_max_exposure: float = 0.90 |
| risk_vol_threshold: float = 0.8 |
|
|
| |
| log_every: int = 10 |
| save_dir: str = "checkpoints" |
| metrics_file: str = "training_metrics.csv" |
| trajectories_file: str = "sft_trajectories.jsonl" |
| save_trajectories: bool = True |
| fast_mode: bool = False |
|
|
| |
| reward_strategy: str = "shared" |
|
|
|
|
| |
| DEFAULT_CONFIG = TrainingConfig() |
|
|