{ "best_global_step": 4266, "best_metric": 0.41695910692214966, "best_model_checkpoint": "tiny_bert_rand_20_v1_qqp/checkpoint-4266", "epoch": 8.0, "eval_steps": 500, "global_step": 11376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.032697916030884, "learning_rate": 4.9e-05, "loss": 0.4821, "step": 1422 }, { "epoch": 1.0, "eval_accuracy": 0.7931981202077665, "eval_combined_score": 0.7460317991638508, "eval_f1": 0.6988654781199352, "eval_loss": 0.43809574842453003, "eval_runtime": 12.0601, "eval_samples_per_second": 3352.379, "eval_steps_per_second": 13.101, "step": 1422 }, { "epoch": 2.0, "grad_norm": 3.111968517303467, "learning_rate": 4.8e-05, "loss": 0.3837, "step": 2844 }, { "epoch": 2.0, "eval_accuracy": 0.8095473658174622, "eval_combined_score": 0.7618316765227506, "eval_f1": 0.7141159872280389, "eval_loss": 0.4237190783023834, "eval_runtime": 11.8112, "eval_samples_per_second": 3423.025, "eval_steps_per_second": 13.377, "step": 2844 }, { "epoch": 3.0, "grad_norm": 3.4837989807128906, "learning_rate": 4.7e-05, "loss": 0.3187, "step": 4266 }, { "epoch": 3.0, "eval_accuracy": 0.8180311649765026, "eval_combined_score": 0.7808225174943499, "eval_f1": 0.7436138700121973, "eval_loss": 0.41695910692214966, "eval_runtime": 11.7905, "eval_samples_per_second": 3429.039, "eval_steps_per_second": 13.401, "step": 4266 }, { "epoch": 4.0, "grad_norm": 3.610124349594116, "learning_rate": 4.600000000000001e-05, "loss": 0.2638, "step": 5688 }, { "epoch": 4.0, "eval_accuracy": 0.8243630967103636, "eval_combined_score": 0.7880077995355406, "eval_f1": 0.7516525023607177, "eval_loss": 0.43834444880485535, "eval_runtime": 11.7911, "eval_samples_per_second": 3428.855, "eval_steps_per_second": 13.4, "step": 5688 }, { "epoch": 5.0, "grad_norm": 3.3936569690704346, "learning_rate": 4.5e-05, "loss": 0.2202, "step": 7110 }, { "epoch": 5.0, "eval_accuracy": 0.8277764036606481, "eval_combined_score": 0.7926280576347196, "eval_f1": 0.757479711608791, "eval_loss": 0.46568045020103455, "eval_runtime": 11.5465, "eval_samples_per_second": 3501.494, "eval_steps_per_second": 13.684, "step": 7110 }, { "epoch": 6.0, "grad_norm": 5.742647647857666, "learning_rate": 4.4000000000000006e-05, "loss": 0.1852, "step": 8532 }, { "epoch": 6.0, "eval_accuracy": 0.8259213455354935, "eval_combined_score": 0.7949921846685177, "eval_f1": 0.7640630238015421, "eval_loss": 0.5005447864532471, "eval_runtime": 11.3692, "eval_samples_per_second": 3556.099, "eval_steps_per_second": 13.897, "step": 8532 }, { "epoch": 7.0, "grad_norm": 4.802578449249268, "learning_rate": 4.3e-05, "loss": 0.1567, "step": 9954 }, { "epoch": 7.0, "eval_accuracy": 0.8237447440019787, "eval_combined_score": 0.7951369025035777, "eval_f1": 0.7665290610051766, "eval_loss": 0.5466973185539246, "eval_runtime": 11.7132, "eval_samples_per_second": 3451.654, "eval_steps_per_second": 13.489, "step": 9954 }, { "epoch": 8.0, "grad_norm": 3.240032434463501, "learning_rate": 4.2e-05, "loss": 0.1357, "step": 11376 }, { "epoch": 8.0, "eval_accuracy": 0.8316843927776404, "eval_combined_score": 0.7958118580814155, "eval_f1": 0.7599393233851907, "eval_loss": 0.5970303416252136, "eval_runtime": 11.8276, "eval_samples_per_second": 3418.281, "eval_steps_per_second": 13.359, "step": 11376 }, { "epoch": 8.0, "step": 11376, "total_flos": 7.633075201391002e+16, "train_loss": 0.2682548877056138, "train_runtime": 1625.0507, "train_samples_per_second": 11194.912, "train_steps_per_second": 43.752 } ], "logging_steps": 1, "max_steps": 71100, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.633075201391002e+16, "train_batch_size": 256, "trial_name": null, "trial_params": null }