File size: 1,974 Bytes
aec0295 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | """
Training configuration for the multi-agent trading environment.
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional
@dataclass
class TrainingConfig:
"""Hyperparameters and configuration for training."""
# βββ Data βββ
data_source: str = "ccxt" # Use CCXT by default for Crypto
tickers: List[str] = field(default_factory=lambda: ["BTC/USDT", "ETH/USDT"])
start_date: str = "2024-01-01"
end_date: str = "2024-12-31"
train_split: float = 0.8
# βββ Environment βββ
initial_cash: float = 100_000.0
commission: float = 0.0005 # Lower commissions for high-volume crypto
max_steps: Optional[int] = None
# βββ Reward Weights βββ
reward_weights: Dict[str, float] = field(default_factory=lambda: {
"profit": 1.0,
"drawdown": 0.8, # Heavier penalty for crypto drawdowns
"volatility": 0.2,
"sharpe": 0.5,
"overtrading": 0.05,
"hold_penalty": 0.01, # Small cost for inaction
"directional_bonus": 0.3, # Reward matching market trend
})
# βββ Training Loop βββ
num_episodes: int = 200
learning_rate: float = 1e-4
gamma: float = 0.99
seed: int = 42
# βββ Agent Settings βββ
trader_aggression: float = 0.6
risk_max_drawdown: float = 0.30 # Higher threshold for crypto
risk_max_exposure: float = 0.90
risk_vol_threshold: float = 0.8 # Crypto-specific volatility threshold
# βββ Logging βββ
log_every: int = 10
save_dir: str = "checkpoints"
metrics_file: str = "training_metrics.csv"
trajectories_file: str = "sft_trajectories.jsonl"
save_trajectories: bool = True
fast_mode: bool = False
# βββ Reward Strategy βββ
reward_strategy: str = "shared"
# Default config instance
DEFAULT_CONFIG = TrainingConfig()
|