| { | |
| "best_global_step": 60, | |
| "best_metric": 0.5812366008758545, | |
| "best_model_checkpoint": "tiny_bert_km_50_v2_mrpc/checkpoint-60", | |
| "epoch": 9.0, | |
| "eval_steps": 500, | |
| "global_step": 135, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.7363313436508179, | |
| "learning_rate": 4.9e-05, | |
| "loss": 0.632, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7034313725490197, | |
| "eval_combined_score": 0.7601876983773843, | |
| "eval_f1": 0.8169440242057489, | |
| "eval_loss": 0.5975198149681091, | |
| "eval_runtime": 0.1327, | |
| "eval_samples_per_second": 3075.467, | |
| "eval_steps_per_second": 15.076, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.268504023551941, | |
| "learning_rate": 4.8e-05, | |
| "loss": 0.5917, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7132352941176471, | |
| "eval_combined_score": 0.7691737008704828, | |
| "eval_f1": 0.8251121076233184, | |
| "eval_loss": 0.5832136273384094, | |
| "eval_runtime": 0.1486, | |
| "eval_samples_per_second": 2746.391, | |
| "eval_steps_per_second": 13.463, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.0295687913894653, | |
| "learning_rate": 4.7e-05, | |
| "loss": 0.5638, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7034313725490197, | |
| "eval_combined_score": 0.7607382426654873, | |
| "eval_f1": 0.8180451127819549, | |
| "eval_loss": 0.5895337462425232, | |
| "eval_runtime": 0.1302, | |
| "eval_samples_per_second": 3134.825, | |
| "eval_steps_per_second": 15.367, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 1.3483902215957642, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 0.5417, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7181372549019608, | |
| "eval_combined_score": 0.7696440551337176, | |
| "eval_f1": 0.8211508553654744, | |
| "eval_loss": 0.5812366008758545, | |
| "eval_runtime": 0.1387, | |
| "eval_samples_per_second": 2941.894, | |
| "eval_steps_per_second": 14.421, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 1.3638129234313965, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.5019, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7009803921568627, | |
| "eval_combined_score": 0.7524194565285921, | |
| "eval_f1": 0.8038585209003215, | |
| "eval_loss": 0.5993048548698425, | |
| "eval_runtime": 0.1529, | |
| "eval_samples_per_second": 2667.691, | |
| "eval_steps_per_second": 13.077, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 1.534933090209961, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 0.4343, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.678921568627451, | |
| "eval_combined_score": 0.7301118694556287, | |
| "eval_f1": 0.7813021702838063, | |
| "eval_loss": 0.620622456073761, | |
| "eval_runtime": 0.1423, | |
| "eval_samples_per_second": 2867.218, | |
| "eval_steps_per_second": 14.055, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 2.426776885986328, | |
| "learning_rate": 4.3e-05, | |
| "loss": 0.3592, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7083333333333334, | |
| "eval_combined_score": 0.754501675041876, | |
| "eval_f1": 0.8006700167504187, | |
| "eval_loss": 0.6731041669845581, | |
| "eval_runtime": 0.1344, | |
| "eval_samples_per_second": 3035.9, | |
| "eval_steps_per_second": 14.882, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 2.3238067626953125, | |
| "learning_rate": 4.2e-05, | |
| "loss": 0.2533, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7009803921568627, | |
| "eval_combined_score": 0.7508169934640523, | |
| "eval_f1": 0.8006535947712419, | |
| "eval_loss": 0.7867075800895691, | |
| "eval_runtime": 0.1325, | |
| "eval_samples_per_second": 3079.546, | |
| "eval_steps_per_second": 15.096, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 2.622627019882202, | |
| "learning_rate": 4.1e-05, | |
| "loss": 0.1831, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7034313725490197, | |
| "eval_combined_score": 0.7517156862745098, | |
| "eval_f1": 0.8, | |
| "eval_loss": 0.8554521203041077, | |
| "eval_runtime": 0.1556, | |
| "eval_samples_per_second": 2622.352, | |
| "eval_steps_per_second": 12.855, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "step": 135, | |
| "total_flos": 865692760633344.0, | |
| "train_loss": 0.45122435004622846, | |
| "train_runtime": 25.3703, | |
| "train_samples_per_second": 7228.916, | |
| "train_steps_per_second": 29.562 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 750, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 865692760633344.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |