{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.4, "eval_steps": 500, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 0.0001, "loss": 1.638, "step": 7 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 1.7376, "step": 14 }, { "epoch": 1.2, "learning_rate": 0.0003, "loss": 1.3745, "step": 21 }, { "epoch": 1.4, "learning_rate": 0.0004, "loss": 1.0169, "step": 28 }, { "epoch": 2.2, "learning_rate": 0.0005, "loss": 0.7142, "step": 35 }, { "epoch": 2.4, "learning_rate": 0.0006, "loss": 0.591, "step": 42 }, { "epoch": 3.2, "learning_rate": 0.0007, "loss": 0.3283, "step": 49 }, { "epoch": 3.4, "learning_rate": 0.0008, "loss": 0.4281, "step": 56 }, { "epoch": 4.2, "learning_rate": 0.0009000000000000001, "loss": 0.2162, "step": 63 }, { "epoch": 4.4, "learning_rate": 0.001, "loss": 0.1895, "step": 70 }, { "epoch": 5.2, "learning_rate": 0.000988888888888889, "loss": 0.1243, "step": 77 }, { "epoch": 5.4, "learning_rate": 0.0009777777777777777, "loss": 0.1904, "step": 84 }, { "epoch": 6.2, "learning_rate": 0.0009666666666666667, "loss": 0.1011, "step": 91 }, { "epoch": 6.4, "learning_rate": 0.0009555555555555556, "loss": 0.116, "step": 98 }, { "epoch": 7.2, "learning_rate": 0.0009444444444444445, "loss": 0.1028, "step": 105 }, { "epoch": 7.4, "learning_rate": 0.0009333333333333333, "loss": 0.0733, "step": 112 }, { "epoch": 8.2, "learning_rate": 0.0009238095238095239, "loss": 0.1564, "step": 119 }, { "epoch": 8.4, "learning_rate": 0.0009126984126984126, "loss": 0.0727, "step": 126 }, { "epoch": 9.2, "learning_rate": 0.0009015873015873016, "loss": 0.1515, "step": 133 }, { "epoch": 9.4, "learning_rate": 0.0008904761904761904, "loss": 0.0682, "step": 140 }, { "epoch": 10.2, "learning_rate": 0.0008793650793650794, "loss": 0.0594, "step": 147 }, { "epoch": 10.4, "learning_rate": 0.0008682539682539683, "loss": 0.137, "step": 154 }, { "epoch": 11.2, "learning_rate": 0.0008571428571428571, "loss": 0.0514, "step": 161 }, { "epoch": 11.4, "learning_rate": 0.0008460317460317461, "loss": 0.0494, "step": 168 }, { "epoch": 12.2, "learning_rate": 0.000834920634920635, "loss": 0.0306, "step": 175 }, { "epoch": 12.4, "learning_rate": 0.0008238095238095238, "loss": 0.041, "step": 182 }, { "epoch": 13.2, "learning_rate": 0.0008126984126984127, "loss": 0.0286, "step": 189 }, { "epoch": 13.4, "learning_rate": 0.0008015873015873017, "loss": 0.0283, "step": 196 }, { "epoch": 14.2, "learning_rate": 0.0007904761904761905, "loss": 0.0153, "step": 203 }, { "epoch": 14.4, "learning_rate": 0.0007793650793650794, "loss": 0.0194, "step": 210 }, { "epoch": 15.2, "learning_rate": 0.0007682539682539683, "loss": 0.0115, "step": 217 }, { "epoch": 15.4, "learning_rate": 0.0007571428571428572, "loss": 0.011, "step": 224 }, { "epoch": 16.2, "learning_rate": 0.000746031746031746, "loss": 0.0072, "step": 231 }, { "epoch": 16.4, "learning_rate": 0.000734920634920635, "loss": 0.0132, "step": 238 }, { "epoch": 17.2, "learning_rate": 0.0007238095238095238, "loss": 0.0065, "step": 245 }, { "epoch": 17.4, "learning_rate": 0.0007126984126984128, "loss": 0.0109, "step": 252 }, { "epoch": 18.2, "learning_rate": 0.0007015873015873015, "loss": 0.0064, "step": 259 }, { "epoch": 18.4, "learning_rate": 0.0006904761904761905, "loss": 0.0076, "step": 266 }, { "epoch": 19.2, "learning_rate": 0.0006793650793650794, "loss": 0.0028, "step": 273 }, { "epoch": 19.4, "learning_rate": 0.0006682539682539683, "loss": 0.0065, "step": 280 } ], "logging_steps": 7, "max_steps": 700, "num_train_epochs": 20, "save_steps": 500, "total_flos": 1.13811835060224e+16, "trial_name": null, "trial_params": null }