{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.393939393939394, "eval_steps": 500, "global_step": 160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.73, "learning_rate": 7.500000000000001e-05, "loss": 2.5979, "step": 6 }, { "epoch": 1.45, "learning_rate": 0.00015000000000000001, "loss": 2.5438, "step": 12 }, { "epoch": 2.18, "learning_rate": 0.00019722222222222225, "loss": 2.4147, "step": 18 }, { "epoch": 2.91, "learning_rate": 0.00018888888888888888, "loss": 2.1395, "step": 24 }, { "epoch": 3.64, "learning_rate": 0.00018055555555555557, "loss": 2.0771, "step": 30 }, { "epoch": 4.36, "learning_rate": 0.00017222222222222224, "loss": 1.9096, "step": 36 }, { "epoch": 5.09, "learning_rate": 0.0001638888888888889, "loss": 1.9748, "step": 42 }, { "epoch": 5.82, "learning_rate": 0.00015555555555555556, "loss": 1.8121, "step": 48 }, { "epoch": 6.55, "learning_rate": 0.00014722222222222223, "loss": 1.7856, "step": 54 }, { "epoch": 7.27, "learning_rate": 0.0001388888888888889, "loss": 1.7752, "step": 60 }, { "epoch": 8.0, "learning_rate": 0.00013055555555555555, "loss": 1.6423, "step": 66 }, { "epoch": 8.73, "learning_rate": 0.00012222222222222224, "loss": 1.6135, "step": 72 }, { "epoch": 9.45, "learning_rate": 0.00011388888888888889, "loss": 1.574, "step": 78 }, { "epoch": 10.18, "learning_rate": 0.00010555555555555557, "loss": 1.4749, "step": 84 }, { "epoch": 10.91, "learning_rate": 9.722222222222223e-05, "loss": 1.4809, "step": 90 }, { "epoch": 11.64, "learning_rate": 8.888888888888889e-05, "loss": 1.4328, "step": 96 }, { "epoch": 12.36, "learning_rate": 8.055555555555556e-05, "loss": 1.299, "step": 102 }, { "epoch": 13.09, "learning_rate": 7.222222222222222e-05, "loss": 1.2881, "step": 108 }, { "epoch": 13.82, "learning_rate": 6.388888888888888e-05, "loss": 1.2392, "step": 114 }, { "epoch": 14.55, "learning_rate": 5.555555555555556e-05, "loss": 1.1873, "step": 120 }, { "epoch": 15.27, "learning_rate": 4.722222222222222e-05, "loss": 1.1339, "step": 126 }, { "epoch": 16.0, "learning_rate": 3.888888888888889e-05, "loss": 1.0971, "step": 132 }, { "epoch": 16.73, "learning_rate": 3.055555555555556e-05, "loss": 1.0632, "step": 138 }, { "epoch": 17.45, "learning_rate": 2.2222222222222223e-05, "loss": 0.9817, "step": 144 }, { "epoch": 18.18, "learning_rate": 1.388888888888889e-05, "loss": 1.0649, "step": 150 }, { "epoch": 18.91, "learning_rate": 5.555555555555556e-06, "loss": 0.9804, "step": 156 } ], "logging_steps": 6, "max_steps": 160, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.601411762192384e+16, "trial_name": null, "trial_params": null }