| { | |
| "batch_size": 64, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.01, | |
| "max_epochs": 60, | |
| "tasks": { | |
| "prediction": 0.6, | |
| "lm": 0.4 | |
| }, | |
| "compile": true, | |
| "enable_ddp": false, | |
| "dtype": "float32", | |
| "num_workers": 16, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "gradient_accumulation_steps": 1, | |
| "grad_clip": 1.0, | |
| "decay_lr": true, | |
| "log_interval": 10, | |
| "save_interval": 5, | |
| "min_lr": 1e-06, | |
| "warmup_iters": 54 | |
| } |