{ "best_global_step": 60, "best_metric": 0.5067150592803955, "best_model_checkpoint": "tinybert_base_train_kd_mrpc/checkpoint-60", "epoch": 9.0, "eval_steps": 500, "global_step": 135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0588346719741821, "learning_rate": 4.906666666666667e-05, "loss": 0.6588, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.6666666666666666, "eval_combined_score": 0.7160919540229884, "eval_f1": 0.7655172413793103, "eval_loss": 0.6283461451530457, "eval_runtime": 0.2187, "eval_samples_per_second": 1865.558, "eval_steps_per_second": 9.145, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.6302603483200073, "learning_rate": 4.806666666666667e-05, "loss": 0.5998, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.696078431372549, "eval_combined_score": 0.7508605323006945, "eval_f1": 0.8056426332288401, "eval_loss": 0.5731440782546997, "eval_runtime": 0.2338, "eval_samples_per_second": 1744.854, "eval_steps_per_second": 8.553, "step": 30 }, { "epoch": 3.0, "grad_norm": 5.0050811767578125, "learning_rate": 4.706666666666667e-05, "loss": 0.5433, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.7401960784313726, "eval_combined_score": 0.786764705882353, "eval_f1": 0.8333333333333334, "eval_loss": 0.5613774657249451, "eval_runtime": 0.2224, "eval_samples_per_second": 1834.58, "eval_steps_per_second": 8.993, "step": 45 }, { "epoch": 4.0, "grad_norm": 10.076091766357422, "learning_rate": 4.606666666666667e-05, "loss": 0.4574, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7916666666666666, "eval_combined_score": 0.8239213197969544, "eval_f1": 0.856175972927242, "eval_loss": 0.5067150592803955, "eval_runtime": 0.2267, "eval_samples_per_second": 1799.599, "eval_steps_per_second": 8.822, "step": 60 }, { "epoch": 5.0, "grad_norm": 12.540026664733887, "learning_rate": 4.5066666666666667e-05, "loss": 0.3525, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.7573529411764706, "eval_combined_score": 0.7888398099711753, "eval_f1": 0.8203266787658802, "eval_loss": 0.5203803777694702, "eval_runtime": 0.2188, "eval_samples_per_second": 1864.761, "eval_steps_per_second": 9.141, "step": 75 }, { "epoch": 6.0, "grad_norm": 10.685439109802246, "learning_rate": 4.406666666666667e-05, "loss": 0.3015, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.7426470588235294, "eval_combined_score": 0.7904298468231669, "eval_f1": 0.8382126348228043, "eval_loss": 0.829555332660675, "eval_runtime": 0.2251, "eval_samples_per_second": 1812.326, "eval_steps_per_second": 8.884, "step": 90 }, { "epoch": 7.0, "grad_norm": 5.721017360687256, "learning_rate": 4.3066666666666665e-05, "loss": 0.2978, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.75, "eval_combined_score": 0.7950626959247649, "eval_f1": 0.8401253918495298, "eval_loss": 0.7863379120826721, "eval_runtime": 0.2298, "eval_samples_per_second": 1775.245, "eval_steps_per_second": 8.702, "step": 105 }, { "epoch": 8.0, "grad_norm": 11.30908203125, "learning_rate": 4.206666666666667e-05, "loss": 0.2163, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7573529411764706, "eval_combined_score": 0.7940610859728507, "eval_f1": 0.8307692307692308, "eval_loss": 0.7096235752105713, "eval_runtime": 0.2225, "eval_samples_per_second": 1833.884, "eval_steps_per_second": 8.99, "step": 120 }, { "epoch": 9.0, "grad_norm": 4.002461910247803, "learning_rate": 4.106666666666667e-05, "loss": 0.1329, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.7549019607843137, "eval_combined_score": 0.7881652661064426, "eval_f1": 0.8214285714285714, "eval_loss": 0.7977427840232849, "eval_runtime": 0.2192, "eval_samples_per_second": 1861.282, "eval_steps_per_second": 9.124, "step": 135 }, { "epoch": 9.0, "step": 135, "total_flos": 2186506882215936.0, "train_loss": 0.3955998191127071, "train_runtime": 48.5344, "train_samples_per_second": 3778.761, "train_steps_per_second": 15.453 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2186506882215936.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }