| { |
| "best_metric": 0.6102265119552612, |
| "best_model_checkpoint": "mobilebert_add_GLUE_Experiment_cola_256/checkpoint-737", |
| "epoch": 16.0, |
| "global_step": 1072, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.9e-05, |
| "loss": 0.6129, |
| "step": 67 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.6179760098457336, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9044, |
| "eval_samples_per_second": 547.668, |
| "eval_steps_per_second": 4.726, |
| "step": 67 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 4.8e-05, |
| "loss": 0.6078, |
| "step": 134 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_loss": 0.6178193688392639, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9385, |
| "eval_samples_per_second": 538.041, |
| "eval_steps_per_second": 4.643, |
| "step": 134 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 4.7e-05, |
| "loss": 0.6073, |
| "step": 201 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_loss": 0.6178669333457947, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9618, |
| "eval_samples_per_second": 531.659, |
| "eval_steps_per_second": 4.588, |
| "step": 201 |
| }, |
| { |
| "epoch": 4.0, |
| "learning_rate": 4.600000000000001e-05, |
| "loss": 0.6067, |
| "step": 268 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_loss": 0.6166986227035522, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9261, |
| "eval_samples_per_second": 541.5, |
| "eval_steps_per_second": 4.673, |
| "step": 268 |
| }, |
| { |
| "epoch": 5.0, |
| "learning_rate": 4.5e-05, |
| "loss": 0.6059, |
| "step": 335 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_loss": 0.6167794466018677, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.882, |
| "eval_samples_per_second": 554.203, |
| "eval_steps_per_second": 4.782, |
| "step": 335 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.5998, |
| "step": 402 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_loss": 0.6115455627441406, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.8963, |
| "eval_samples_per_second": 550.027, |
| "eval_steps_per_second": 4.746, |
| "step": 402 |
| }, |
| { |
| "epoch": 7.0, |
| "learning_rate": 4.3e-05, |
| "loss": 0.5917, |
| "step": 469 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_loss": 0.6122425198554993, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9219, |
| "eval_samples_per_second": 542.687, |
| "eval_steps_per_second": 4.683, |
| "step": 469 |
| }, |
| { |
| "epoch": 8.0, |
| "learning_rate": 4.2e-05, |
| "loss": 0.5849, |
| "step": 536 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_loss": 0.6126018166542053, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.9382, |
| "eval_samples_per_second": 538.131, |
| "eval_steps_per_second": 4.644, |
| "step": 536 |
| }, |
| { |
| "epoch": 9.0, |
| "learning_rate": 4.1e-05, |
| "loss": 0.5796, |
| "step": 603 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_loss": 0.6276524066925049, |
| "eval_matthews_correlation": 0.0, |
| "eval_runtime": 1.8651, |
| "eval_samples_per_second": 559.208, |
| "eval_steps_per_second": 4.825, |
| "step": 603 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 4e-05, |
| "loss": 0.5759, |
| "step": 670 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_loss": 0.6138085722923279, |
| "eval_matthews_correlation": 0.00286100001416597, |
| "eval_runtime": 1.8774, |
| "eval_samples_per_second": 555.557, |
| "eval_steps_per_second": 4.794, |
| "step": 670 |
| }, |
| { |
| "epoch": 11.0, |
| "learning_rate": 3.9000000000000006e-05, |
| "loss": 0.5733, |
| "step": 737 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_loss": 0.6102265119552612, |
| "eval_matthews_correlation": 0.01845565733408863, |
| "eval_runtime": 1.925, |
| "eval_samples_per_second": 541.806, |
| "eval_steps_per_second": 4.675, |
| "step": 737 |
| }, |
| { |
| "epoch": 12.0, |
| "learning_rate": 3.8e-05, |
| "loss": 0.5716, |
| "step": 804 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_loss": 0.6143413782119751, |
| "eval_matthews_correlation": 0.025208083291660098, |
| "eval_runtime": 1.8542, |
| "eval_samples_per_second": 562.494, |
| "eval_steps_per_second": 4.854, |
| "step": 804 |
| }, |
| { |
| "epoch": 13.0, |
| "learning_rate": 3.7e-05, |
| "loss": 0.5667, |
| "step": 871 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_loss": 0.6347153782844543, |
| "eval_matthews_correlation": 0.03482284441916008, |
| "eval_runtime": 1.8966, |
| "eval_samples_per_second": 549.934, |
| "eval_steps_per_second": 4.745, |
| "step": 871 |
| }, |
| { |
| "epoch": 14.0, |
| "learning_rate": 3.6e-05, |
| "loss": 0.5662, |
| "step": 938 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_loss": 0.6314128637313843, |
| "eval_matthews_correlation": 0.03846275142815186, |
| "eval_runtime": 1.858, |
| "eval_samples_per_second": 561.368, |
| "eval_steps_per_second": 4.844, |
| "step": 938 |
| }, |
| { |
| "epoch": 15.0, |
| "learning_rate": 3.5e-05, |
| "loss": 0.5631, |
| "step": 1005 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_loss": 0.6130307912826538, |
| "eval_matthews_correlation": 0.017448205413933698, |
| "eval_runtime": 1.8802, |
| "eval_samples_per_second": 554.731, |
| "eval_steps_per_second": 4.787, |
| "step": 1005 |
| }, |
| { |
| "epoch": 16.0, |
| "learning_rate": 3.4000000000000007e-05, |
| "loss": 0.5628, |
| "step": 1072 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_loss": 0.6218040585517883, |
| "eval_matthews_correlation": 0.03482284441916008, |
| "eval_runtime": 1.8684, |
| "eval_samples_per_second": 558.219, |
| "eval_steps_per_second": 4.817, |
| "step": 1072 |
| }, |
| { |
| "epoch": 16.0, |
| "step": 1072, |
| "total_flos": 3526461549969408.0, |
| "train_loss": 0.5860174342767516, |
| "train_runtime": 908.8925, |
| "train_samples_per_second": 470.408, |
| "train_steps_per_second": 3.686 |
| } |
| ], |
| "max_steps": 3350, |
| "num_train_epochs": 50, |
| "total_flos": 3526461549969408.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|