{ "best_global_step": 60, "best_metric": 0.5812366008758545, "best_model_checkpoint": "tiny_bert_km_50_v2_mrpc/checkpoint-60", "epoch": 9.0, "eval_steps": 500, "global_step": 135, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7363313436508179, "learning_rate": 4.9e-05, "loss": 0.632, "step": 15 }, { "epoch": 1.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7601876983773843, "eval_f1": 0.8169440242057489, "eval_loss": 0.5975198149681091, "eval_runtime": 0.1327, "eval_samples_per_second": 3075.467, "eval_steps_per_second": 15.076, "step": 15 }, { "epoch": 2.0, "grad_norm": 1.268504023551941, "learning_rate": 4.8e-05, "loss": 0.5917, "step": 30 }, { "epoch": 2.0, "eval_accuracy": 0.7132352941176471, "eval_combined_score": 0.7691737008704828, "eval_f1": 0.8251121076233184, "eval_loss": 0.5832136273384094, "eval_runtime": 0.1486, "eval_samples_per_second": 2746.391, "eval_steps_per_second": 13.463, "step": 30 }, { "epoch": 3.0, "grad_norm": 1.0295687913894653, "learning_rate": 4.7e-05, "loss": 0.5638, "step": 45 }, { "epoch": 3.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7607382426654873, "eval_f1": 0.8180451127819549, "eval_loss": 0.5895337462425232, "eval_runtime": 0.1302, "eval_samples_per_second": 3134.825, "eval_steps_per_second": 15.367, "step": 45 }, { "epoch": 4.0, "grad_norm": 1.3483902215957642, "learning_rate": 4.600000000000001e-05, "loss": 0.5417, "step": 60 }, { "epoch": 4.0, "eval_accuracy": 0.7181372549019608, "eval_combined_score": 0.7696440551337176, "eval_f1": 0.8211508553654744, "eval_loss": 0.5812366008758545, "eval_runtime": 0.1387, "eval_samples_per_second": 2941.894, "eval_steps_per_second": 14.421, "step": 60 }, { "epoch": 5.0, "grad_norm": 1.3638129234313965, "learning_rate": 4.5e-05, "loss": 0.5019, "step": 75 }, { "epoch": 5.0, "eval_accuracy": 0.7009803921568627, "eval_combined_score": 0.7524194565285921, "eval_f1": 0.8038585209003215, "eval_loss": 0.5993048548698425, "eval_runtime": 0.1529, "eval_samples_per_second": 2667.691, "eval_steps_per_second": 13.077, "step": 75 }, { "epoch": 6.0, "grad_norm": 1.534933090209961, "learning_rate": 4.4000000000000006e-05, "loss": 0.4343, "step": 90 }, { "epoch": 6.0, "eval_accuracy": 0.678921568627451, "eval_combined_score": 0.7301118694556287, "eval_f1": 0.7813021702838063, "eval_loss": 0.620622456073761, "eval_runtime": 0.1423, "eval_samples_per_second": 2867.218, "eval_steps_per_second": 14.055, "step": 90 }, { "epoch": 7.0, "grad_norm": 2.426776885986328, "learning_rate": 4.3e-05, "loss": 0.3592, "step": 105 }, { "epoch": 7.0, "eval_accuracy": 0.7083333333333334, "eval_combined_score": 0.754501675041876, "eval_f1": 0.8006700167504187, "eval_loss": 0.6731041669845581, "eval_runtime": 0.1344, "eval_samples_per_second": 3035.9, "eval_steps_per_second": 14.882, "step": 105 }, { "epoch": 8.0, "grad_norm": 2.3238067626953125, "learning_rate": 4.2e-05, "loss": 0.2533, "step": 120 }, { "epoch": 8.0, "eval_accuracy": 0.7009803921568627, "eval_combined_score": 0.7508169934640523, "eval_f1": 0.8006535947712419, "eval_loss": 0.7867075800895691, "eval_runtime": 0.1325, "eval_samples_per_second": 3079.546, "eval_steps_per_second": 15.096, "step": 120 }, { "epoch": 9.0, "grad_norm": 2.622627019882202, "learning_rate": 4.1e-05, "loss": 0.1831, "step": 135 }, { "epoch": 9.0, "eval_accuracy": 0.7034313725490197, "eval_combined_score": 0.7517156862745098, "eval_f1": 0.8, "eval_loss": 0.8554521203041077, "eval_runtime": 0.1556, "eval_samples_per_second": 2622.352, "eval_steps_per_second": 12.855, "step": 135 }, { "epoch": 9.0, "step": 135, "total_flos": 865692760633344.0, "train_loss": 0.45122435004622846, "train_runtime": 25.3703, "train_samples_per_second": 7228.916, "train_steps_per_second": 29.562 } ], "logging_steps": 1, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 865692760633344.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }