| { | |
| "beta": 0.05, | |
| "gamma": 0.99, | |
| "clip_range": 0.2, | |
| "learning_rate": 5e-06, | |
| "batch_size": 1, | |
| "gradient_accumulation_steps": 4, | |
| "max_epochs": 2, | |
| "max_length": 256, | |
| "max_new_tokens": 100, | |
| "temperature": 0.8, | |
| "top_p": 0.9, | |
| "do_sample": true, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.1, | |
| "max_grad_norm": 1.0, | |
| "force_gpu": true, | |
| "mixed_precision": false, | |
| "gradient_checkpointing": true, | |
| "max_memory_per_gpu": "14GB", | |
| "logging_steps": 5, | |
| "eval_steps": 50, | |
| "save_steps": 50, | |
| "save_total_limit": 3, | |
| "reward_method": "adaptive", | |
| "baseline_type": "batch_mean", | |
| "ema_alpha": 0.1, | |
| "kl_target": 0.01, | |
| "kl_horizon": 10000, | |
| "adaptive_kl": true | |
| } |