| { | |
| "best_metric": 0.508567750453949, | |
| "best_model_checkpoint": "bert_final_model/checkpoint-430", | |
| "epoch": 20.0, | |
| "global_step": 8600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7917374080362196, | |
| "eval_loss": 0.508567750453949, | |
| "eval_runtime": 14.6046, | |
| "eval_samples_per_second": 241.978, | |
| "eval_steps_per_second": 3.834, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 9.418604651162791e-06, | |
| "loss": 0.539, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7959818902093945, | |
| "eval_loss": 0.5117859840393066, | |
| "eval_runtime": 14.7107, | |
| "eval_samples_per_second": 240.233, | |
| "eval_steps_per_second": 3.807, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 8.837209302325582e-06, | |
| "loss": 0.4463, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7891907187323146, | |
| "eval_loss": 0.5270956158638, | |
| "eval_runtime": 14.86, | |
| "eval_samples_per_second": 237.819, | |
| "eval_steps_per_second": 3.769, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 8.255813953488374e-06, | |
| "loss": 0.3718, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7897566496887379, | |
| "eval_loss": 0.5500622987747192, | |
| "eval_runtime": 14.8926, | |
| "eval_samples_per_second": 237.3, | |
| "eval_steps_per_second": 3.76, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 7.674418604651164e-06, | |
| "loss": 0.3066, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7900396151669496, | |
| "eval_loss": 0.5998191237449646, | |
| "eval_runtime": 14.9204, | |
| "eval_samples_per_second": 236.857, | |
| "eval_steps_per_second": 3.753, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 7.0930232558139545e-06, | |
| "loss": 0.2395, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7843803056027164, | |
| "eval_loss": 0.6628095507621765, | |
| "eval_runtime": 14.9429, | |
| "eval_samples_per_second": 236.5, | |
| "eval_steps_per_second": 3.748, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 6.511627906976745e-06, | |
| "loss": 0.1955, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7826825127334465, | |
| "eval_loss": 0.723823070526123, | |
| "eval_runtime": 14.9266, | |
| "eval_samples_per_second": 236.758, | |
| "eval_steps_per_second": 3.752, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7821165817770233, | |
| "eval_loss": 0.7725191116333008, | |
| "eval_runtime": 14.9366, | |
| "eval_samples_per_second": 236.601, | |
| "eval_steps_per_second": 3.749, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "learning_rate": 5.930232558139536e-06, | |
| "loss": 0.1485, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7852292020373515, | |
| "eval_loss": 0.8427122831344604, | |
| "eval_runtime": 14.994, | |
| "eval_samples_per_second": 235.695, | |
| "eval_steps_per_second": 3.735, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 5.348837209302326e-06, | |
| "loss": 0.1196, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7818336162988115, | |
| "eval_loss": 0.9093418121337891, | |
| "eval_runtime": 15.0048, | |
| "eval_samples_per_second": 235.524, | |
| "eval_steps_per_second": 3.732, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 4.767441860465117e-06, | |
| "loss": 0.0998, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.77985285795133, | |
| "eval_loss": 0.9718943238258362, | |
| "eval_runtime": 14.9478, | |
| "eval_samples_per_second": 236.422, | |
| "eval_steps_per_second": 3.746, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 4.186046511627907e-06, | |
| "loss": 0.0838, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7809847198641766, | |
| "eval_loss": 0.9967551827430725, | |
| "eval_runtime": 15.0053, | |
| "eval_samples_per_second": 235.517, | |
| "eval_steps_per_second": 3.732, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 12.79, | |
| "learning_rate": 3.6046511627906977e-06, | |
| "loss": 0.0717, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7829654782116582, | |
| "eval_loss": 1.0477949380874634, | |
| "eval_runtime": 14.9708, | |
| "eval_samples_per_second": 236.059, | |
| "eval_steps_per_second": 3.741, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 13.95, | |
| "learning_rate": 3.0232558139534885e-06, | |
| "loss": 0.0615, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7818336162988115, | |
| "eval_loss": 1.085091471672058, | |
| "eval_runtime": 15.0224, | |
| "eval_samples_per_second": 235.249, | |
| "eval_steps_per_second": 3.728, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7784380305602716, | |
| "eval_loss": 1.1496540307998657, | |
| "eval_runtime": 14.9724, | |
| "eval_samples_per_second": 236.034, | |
| "eval_steps_per_second": 3.74, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 2.4418604651162793e-06, | |
| "loss": 0.0554, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7869269949066214, | |
| "eval_loss": 1.1740212440490723, | |
| "eval_runtime": 14.9588, | |
| "eval_samples_per_second": 236.25, | |
| "eval_steps_per_second": 3.744, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 16.28, | |
| "learning_rate": 1.86046511627907e-06, | |
| "loss": 0.0459, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7846632710809281, | |
| "eval_loss": 1.2093095779418945, | |
| "eval_runtime": 14.779, | |
| "eval_samples_per_second": 239.124, | |
| "eval_steps_per_second": 3.789, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 17.44, | |
| "learning_rate": 1.2790697674418605e-06, | |
| "loss": 0.0421, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.787209960384833, | |
| "eval_loss": 1.2152066230773926, | |
| "eval_runtime": 14.8521, | |
| "eval_samples_per_second": 237.946, | |
| "eval_steps_per_second": 3.771, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 6.976744186046513e-07, | |
| "loss": 0.042, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7849462365591398, | |
| "eval_loss": 1.2264506816864014, | |
| "eval_runtime": 14.8089, | |
| "eval_samples_per_second": 238.64, | |
| "eval_steps_per_second": 3.781, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 19.77, | |
| "learning_rate": 1.1627906976744187e-07, | |
| "loss": 0.0359, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7855121675155631, | |
| "eval_loss": 1.2309114933013916, | |
| "eval_runtime": 14.8778, | |
| "eval_samples_per_second": 237.536, | |
| "eval_steps_per_second": 3.764, | |
| "step": 8600 | |
| } | |
| ], | |
| "max_steps": 8600, | |
| "num_train_epochs": 20, | |
| "total_flos": 3.10690695931308e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |