| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "global_step": 1620, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9176954732510288e-05, | |
| "loss": 1.5253, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.835390946502058e-05, | |
| "loss": 0.5112, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.7530864197530865e-05, | |
| "loss": 0.3811, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.670781893004115e-05, | |
| "loss": 0.2638, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.588477366255144e-05, | |
| "loss": 0.2136, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.506172839506173e-05, | |
| "loss": 0.1868, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.4238683127572017e-05, | |
| "loss": 0.17, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.3415637860082307e-05, | |
| "loss": 0.1193, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.2592592592592593e-05, | |
| "loss": 0.108, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.1769547325102882e-05, | |
| "loss": 0.0724, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.0946502057613168e-05, | |
| "loss": 0.072, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 1.0123456790123458e-05, | |
| "loss": 0.0732, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 9.300411522633745e-06, | |
| "loss": 0.0483, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 8.477366255144033e-06, | |
| "loss": 0.0586, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 7.654320987654322e-06, | |
| "loss": 0.0432, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 6.83127572016461e-06, | |
| "loss": 0.0275, | |
| "step": 1600 | |
| } | |
| ], | |
| "max_steps": 2430, | |
| "num_train_epochs": 3, | |
| "total_flos": 5.698796483985408e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |