| { |
| "compile": true, |
| "enable_ddp": false, |
| "dtype": "bfloat16", |
| "eval_only": false, |
| "max_iters": 250000, |
| "max_epochs": null, |
| "batch_size": 1024, |
| "learning_rate": 0.0006, |
| "weight_decay": 0.1, |
| "beta1": 0.9, |
| "beta2": 0.95, |
| "gradient_accumulation_steps": 1, |
| "grad_clip": 1.0, |
| "decay_lr": true, |
| "warmup_iters": 5000, |
| "lr_decay_iters": 250000, |
| "min_lr": 6e-05, |
| "eval_iters": 48, |
| "eval_interval": 25000, |
| "log_interval": 5000, |
| "always_save_checkpoint": false, |
| "save_checkpoint_every": 50000, |
| "save_checkpoint": true, |
| "save_snapshot": false, |
| "eval_generation": true, |
| "block_size": 128, |
| "tasks": { |
| "generation": 0.8, |
| "reconstruction": 0.1, |
| "physchem": 0.1 |
| } |
| } |
|
|