| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 3452, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.884393063583816e-05, | |
| "loss": 0.9083, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00019826589595375724, | |
| "loss": 0.5639, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00018911783644558918, | |
| "loss": 0.507, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.0001780424983902125, | |
| "loss": 0.4756, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0001669671603348358, | |
| "loss": 0.4768, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 0.0001558918222794591, | |
| "loss": 0.4347, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00014481648422408244, | |
| "loss": 0.4233, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00013374114616870574, | |
| "loss": 0.4164, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00012266580811332904, | |
| "loss": 0.393, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 0.00011159047005795235, | |
| "loss": 0.403, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.00010051513200257567, | |
| "loss": 0.3503, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 8.943979394719897e-05, | |
| "loss": 0.3312, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 7.836445589182228e-05, | |
| "loss": 0.3642, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 6.72891178364456e-05, | |
| "loss": 0.3422, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 5.62137797810689e-05, | |
| "loss": 0.3486, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 4.513844172569221e-05, | |
| "loss": 0.2995, | |
| "step": 2752 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.406310367031552e-05, | |
| "loss": 0.2809, | |
| "step": 2924 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 2.298776561493883e-05, | |
| "loss": 0.2962, | |
| "step": 3096 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 1.1912427559562139e-05, | |
| "loss": 0.303, | |
| "step": 3268 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 9.01481004507405e-07, | |
| "loss": 0.3121, | |
| "step": 3440 | |
| } | |
| ], | |
| "logging_steps": 172, | |
| "max_steps": 3452, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "total_flos": 2.8062729384650342e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |