{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 500, "global_step": 3452, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 9.884393063583816e-05, "loss": 0.9083, "step": 172 }, { "epoch": 0.4, "learning_rate": 0.00019826589595375724, "loss": 0.5639, "step": 344 }, { "epoch": 0.6, "learning_rate": 0.00018911783644558918, "loss": 0.507, "step": 516 }, { "epoch": 0.8, "learning_rate": 0.0001780424983902125, "loss": 0.4756, "step": 688 }, { "epoch": 1.0, "learning_rate": 0.0001669671603348358, "loss": 0.4768, "step": 860 }, { "epoch": 1.2, "learning_rate": 0.0001558918222794591, "loss": 0.4347, "step": 1032 }, { "epoch": 1.4, "learning_rate": 0.00014481648422408244, "loss": 0.4233, "step": 1204 }, { "epoch": 1.59, "learning_rate": 0.00013374114616870574, "loss": 0.4164, "step": 1376 }, { "epoch": 1.79, "learning_rate": 0.00012266580811332904, "loss": 0.393, "step": 1548 }, { "epoch": 1.99, "learning_rate": 0.00011159047005795235, "loss": 0.403, "step": 1720 }, { "epoch": 2.19, "learning_rate": 0.00010051513200257567, "loss": 0.3503, "step": 1892 }, { "epoch": 2.39, "learning_rate": 8.943979394719897e-05, "loss": 0.3312, "step": 2064 }, { "epoch": 2.59, "learning_rate": 7.836445589182228e-05, "loss": 0.3642, "step": 2236 }, { "epoch": 2.79, "learning_rate": 6.72891178364456e-05, "loss": 0.3422, "step": 2408 }, { "epoch": 2.99, "learning_rate": 5.62137797810689e-05, "loss": 0.3486, "step": 2580 }, { "epoch": 3.19, "learning_rate": 4.513844172569221e-05, "loss": 0.2995, "step": 2752 }, { "epoch": 3.39, "learning_rate": 3.406310367031552e-05, "loss": 0.2809, "step": 2924 }, { "epoch": 3.59, "learning_rate": 2.298776561493883e-05, "loss": 0.2962, "step": 3096 }, { "epoch": 3.79, "learning_rate": 1.1912427559562139e-05, "loss": 0.303, "step": 3268 }, { "epoch": 3.99, "learning_rate": 9.01481004507405e-07, "loss": 0.3121, "step": 3440 } ], "logging_steps": 172, "max_steps": 3452, "num_train_epochs": 4, "save_steps": 500, "total_flos": 2.8062729384650342e+17, "trial_name": null, "trial_params": null }