| { | |
| "best_global_step": 4266, | |
| "best_metric": 0.41695910692214966, | |
| "best_model_checkpoint": "tiny_bert_rand_20_v1_qqp/checkpoint-4266", | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 11376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.032697916030884, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.4821, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7931981202077665, | |
| "eval_combined_score": 0.7460317991638508, | |
| "eval_f1": 0.6988654781199352, | |
| "eval_loss": 0.43809574842453003, | |
| "eval_runtime": 12.0601, | |
| "eval_samples_per_second": 3352.379, | |
| "eval_steps_per_second": 13.101, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.111968517303467, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.3837, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8095473658174622, | |
| "eval_combined_score": 0.7618316765227506, | |
| "eval_f1": 0.7141159872280389, | |
| "eval_loss": 0.4237190783023834, | |
| "eval_runtime": 11.8112, | |
| "eval_samples_per_second": 3423.025, | |
| "eval_steps_per_second": 13.377, | |
| "step": 2844 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.4837989807128906, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.3187, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8180311649765026, | |
| "eval_combined_score": 0.7808225174943499, | |
| "eval_f1": 0.7436138700121973, | |
| "eval_loss": 0.41695910692214966, | |
| "eval_runtime": 11.7905, | |
| "eval_samples_per_second": 3429.039, | |
| "eval_steps_per_second": 13.401, | |
| "step": 4266 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.610124349594116, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.2638, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8243630967103636, | |
| "eval_combined_score": 0.7880077995355406, | |
| "eval_f1": 0.7516525023607177, | |
| "eval_loss": 0.43834444880485535, | |
| "eval_runtime": 11.7911, | |
| "eval_samples_per_second": 3428.855, | |
| "eval_steps_per_second": 13.4, | |
| "step": 5688 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.3936569690704346, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.2202, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8277764036606481, | |
| "eval_combined_score": 0.7926280576347196, | |
| "eval_f1": 0.757479711608791, | |
| "eval_loss": 0.46568045020103455, | |
| "eval_runtime": 11.5465, | |
| "eval_samples_per_second": 3501.494, | |
| "eval_steps_per_second": 13.684, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 5.742647647857666, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.1852, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8259213455354935, | |
| "eval_combined_score": 0.7949921846685177, | |
| "eval_f1": 0.7640630238015421, | |
| "eval_loss": 0.5005447864532471, | |
| "eval_runtime": 11.3692, | |
| "eval_samples_per_second": 3556.099, | |
| "eval_steps_per_second": 13.897, | |
| "step": 8532 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 4.802578449249268, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.1567, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8237447440019787, | |
| "eval_combined_score": 0.7951369025035777, | |
| "eval_f1": 0.7665290610051766, | |
| "eval_loss": 0.5466973185539246, | |
| "eval_runtime": 11.7132, | |
| "eval_samples_per_second": 3451.654, | |
| "eval_steps_per_second": 13.489, | |
| "step": 9954 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.240032434463501, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.1357, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8316843927776404, | |
| "eval_combined_score": 0.7958118580814155, | |
| "eval_f1": 0.7599393233851907, | |
| "eval_loss": 0.5970303416252136, | |
| "eval_runtime": 11.8276, | |
| "eval_samples_per_second": 3418.281, | |
| "eval_steps_per_second": 13.359, | |
| "step": 11376 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 11376, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_loss": 0.2682548877056138, | |
| "train_runtime": 1625.0507, | |
| "train_samples_per_second": 11194.912, | |
| "train_steps_per_second": 43.752 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 71100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.633075201391002e+16, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |