{ "best_metric": 0.508567750453949, "best_model_checkpoint": "bert_final_model/checkpoint-430", "epoch": 20.0, "global_step": 8600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7917374080362196, "eval_loss": 0.508567750453949, "eval_runtime": 14.6046, "eval_samples_per_second": 241.978, "eval_steps_per_second": 3.834, "step": 430 }, { "epoch": 1.16, "learning_rate": 9.418604651162791e-06, "loss": 0.539, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.7959818902093945, "eval_loss": 0.5117859840393066, "eval_runtime": 14.7107, "eval_samples_per_second": 240.233, "eval_steps_per_second": 3.807, "step": 860 }, { "epoch": 2.33, "learning_rate": 8.837209302325582e-06, "loss": 0.4463, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.7891907187323146, "eval_loss": 0.5270956158638, "eval_runtime": 14.86, "eval_samples_per_second": 237.819, "eval_steps_per_second": 3.769, "step": 1290 }, { "epoch": 3.49, "learning_rate": 8.255813953488374e-06, "loss": 0.3718, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.7897566496887379, "eval_loss": 0.5500622987747192, "eval_runtime": 14.8926, "eval_samples_per_second": 237.3, "eval_steps_per_second": 3.76, "step": 1720 }, { "epoch": 4.65, "learning_rate": 7.674418604651164e-06, "loss": 0.3066, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.7900396151669496, "eval_loss": 0.5998191237449646, "eval_runtime": 14.9204, "eval_samples_per_second": 236.857, "eval_steps_per_second": 3.753, "step": 2150 }, { "epoch": 5.81, "learning_rate": 7.0930232558139545e-06, "loss": 0.2395, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.7843803056027164, "eval_loss": 0.6628095507621765, "eval_runtime": 14.9429, "eval_samples_per_second": 236.5, "eval_steps_per_second": 3.748, "step": 2580 }, { "epoch": 6.98, "learning_rate": 6.511627906976745e-06, "loss": 0.1955, "step": 3000 }, { "epoch": 7.0, "eval_accuracy": 0.7826825127334465, "eval_loss": 0.723823070526123, "eval_runtime": 14.9266, "eval_samples_per_second": 236.758, "eval_steps_per_second": 3.752, "step": 3010 }, { "epoch": 8.0, "eval_accuracy": 0.7821165817770233, "eval_loss": 0.7725191116333008, "eval_runtime": 14.9366, "eval_samples_per_second": 236.601, "eval_steps_per_second": 3.749, "step": 3440 }, { "epoch": 8.14, "learning_rate": 5.930232558139536e-06, "loss": 0.1485, "step": 3500 }, { "epoch": 9.0, "eval_accuracy": 0.7852292020373515, "eval_loss": 0.8427122831344604, "eval_runtime": 14.994, "eval_samples_per_second": 235.695, "eval_steps_per_second": 3.735, "step": 3870 }, { "epoch": 9.3, "learning_rate": 5.348837209302326e-06, "loss": 0.1196, "step": 4000 }, { "epoch": 10.0, "eval_accuracy": 0.7818336162988115, "eval_loss": 0.9093418121337891, "eval_runtime": 15.0048, "eval_samples_per_second": 235.524, "eval_steps_per_second": 3.732, "step": 4300 }, { "epoch": 10.47, "learning_rate": 4.767441860465117e-06, "loss": 0.0998, "step": 4500 }, { "epoch": 11.0, "eval_accuracy": 0.77985285795133, "eval_loss": 0.9718943238258362, "eval_runtime": 14.9478, "eval_samples_per_second": 236.422, "eval_steps_per_second": 3.746, "step": 4730 }, { "epoch": 11.63, "learning_rate": 4.186046511627907e-06, "loss": 0.0838, "step": 5000 }, { "epoch": 12.0, "eval_accuracy": 0.7809847198641766, "eval_loss": 0.9967551827430725, "eval_runtime": 15.0053, "eval_samples_per_second": 235.517, "eval_steps_per_second": 3.732, "step": 5160 }, { "epoch": 12.79, "learning_rate": 3.6046511627906977e-06, "loss": 0.0717, "step": 5500 }, { "epoch": 13.0, "eval_accuracy": 0.7829654782116582, "eval_loss": 1.0477949380874634, "eval_runtime": 14.9708, "eval_samples_per_second": 236.059, "eval_steps_per_second": 3.741, "step": 5590 }, { "epoch": 13.95, "learning_rate": 3.0232558139534885e-06, "loss": 0.0615, "step": 6000 }, { "epoch": 14.0, "eval_accuracy": 0.7818336162988115, "eval_loss": 1.085091471672058, "eval_runtime": 15.0224, "eval_samples_per_second": 235.249, "eval_steps_per_second": 3.728, "step": 6020 }, { "epoch": 15.0, "eval_accuracy": 0.7784380305602716, "eval_loss": 1.1496540307998657, "eval_runtime": 14.9724, "eval_samples_per_second": 236.034, "eval_steps_per_second": 3.74, "step": 6450 }, { "epoch": 15.12, "learning_rate": 2.4418604651162793e-06, "loss": 0.0554, "step": 6500 }, { "epoch": 16.0, "eval_accuracy": 0.7869269949066214, "eval_loss": 1.1740212440490723, "eval_runtime": 14.9588, "eval_samples_per_second": 236.25, "eval_steps_per_second": 3.744, "step": 6880 }, { "epoch": 16.28, "learning_rate": 1.86046511627907e-06, "loss": 0.0459, "step": 7000 }, { "epoch": 17.0, "eval_accuracy": 0.7846632710809281, "eval_loss": 1.2093095779418945, "eval_runtime": 14.779, "eval_samples_per_second": 239.124, "eval_steps_per_second": 3.789, "step": 7310 }, { "epoch": 17.44, "learning_rate": 1.2790697674418605e-06, "loss": 0.0421, "step": 7500 }, { "epoch": 18.0, "eval_accuracy": 0.787209960384833, "eval_loss": 1.2152066230773926, "eval_runtime": 14.8521, "eval_samples_per_second": 237.946, "eval_steps_per_second": 3.771, "step": 7740 }, { "epoch": 18.6, "learning_rate": 6.976744186046513e-07, "loss": 0.042, "step": 8000 }, { "epoch": 19.0, "eval_accuracy": 0.7849462365591398, "eval_loss": 1.2264506816864014, "eval_runtime": 14.8089, "eval_samples_per_second": 238.64, "eval_steps_per_second": 3.781, "step": 8170 }, { "epoch": 19.77, "learning_rate": 1.1627906976744187e-07, "loss": 0.0359, "step": 8500 }, { "epoch": 20.0, "eval_accuracy": 0.7855121675155631, "eval_loss": 1.2309114933013916, "eval_runtime": 14.8778, "eval_samples_per_second": 237.536, "eval_steps_per_second": 3.764, "step": 8600 } ], "max_steps": 8600, "num_train_epochs": 20, "total_flos": 3.10690695931308e+16, "trial_name": null, "trial_params": null }