{ "best_global_step": 253, "best_metric": 0.7126866579055786, "best_model_checkpoint": "tinybert_base_train_kd_stsb/checkpoint-253", "epoch": 16.0, "eval_steps": 500, "global_step": 368, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 11.535841941833496, "learning_rate": 4.904347826086957e-05, "loss": 2.613, "step": 23 }, { "epoch": 1.0, "eval_combined_score": 0.121590196234291, "eval_loss": 2.5623927116394043, "eval_pearson": 0.11448873579502106, "eval_runtime": 0.7966, "eval_samples_per_second": 1883.001, "eval_spearmanr": 0.12869165667356094, "eval_steps_per_second": 7.532, "step": 23 }, { "epoch": 2.0, "grad_norm": 43.645957946777344, "learning_rate": 4.804347826086957e-05, "loss": 1.8186, "step": 46 }, { "epoch": 2.0, "eval_combined_score": 0.715544374833673, "eval_loss": 1.1170806884765625, "eval_pearson": 0.716871976852417, "eval_runtime": 0.7553, "eval_samples_per_second": 1985.885, "eval_spearmanr": 0.714216772814929, "eval_steps_per_second": 7.944, "step": 46 }, { "epoch": 3.0, "grad_norm": 15.847740173339844, "learning_rate": 4.7043478260869564e-05, "loss": 1.1222, "step": 69 }, { "epoch": 3.0, "eval_combined_score": 0.7812649103361896, "eval_loss": 1.0924603939056396, "eval_pearson": 0.7803035974502563, "eval_runtime": 0.7452, "eval_samples_per_second": 2012.821, "eval_spearmanr": 0.7822262232221229, "eval_steps_per_second": 8.051, "step": 69 }, { "epoch": 4.0, "grad_norm": 11.357965469360352, "learning_rate": 4.6043478260869567e-05, "loss": 0.8374, "step": 92 }, { "epoch": 4.0, "eval_combined_score": 0.8146078175768736, "eval_loss": 0.7485730648040771, "eval_pearson": 0.8169819712638855, "eval_runtime": 0.7433, "eval_samples_per_second": 2018.016, "eval_spearmanr": 0.8122336638898616, "eval_steps_per_second": 8.072, "step": 92 }, { "epoch": 5.0, "grad_norm": 33.945899963378906, "learning_rate": 4.504347826086956e-05, "loss": 0.7145, "step": 115 }, { "epoch": 5.0, "eval_combined_score": 0.8218050530535539, "eval_loss": 0.7349043488502502, "eval_pearson": 0.8232354521751404, "eval_runtime": 0.7412, "eval_samples_per_second": 2023.657, "eval_spearmanr": 0.8203746539319674, "eval_steps_per_second": 8.095, "step": 115 }, { "epoch": 6.0, "grad_norm": 8.291410446166992, "learning_rate": 4.404347826086957e-05, "loss": 0.5299, "step": 138 }, { "epoch": 6.0, "eval_combined_score": 0.8291541581362327, "eval_loss": 0.715798020362854, "eval_pearson": 0.8317586183547974, "eval_runtime": 0.7634, "eval_samples_per_second": 1964.835, "eval_spearmanr": 0.826549697917668, "eval_steps_per_second": 7.859, "step": 138 }, { "epoch": 7.0, "grad_norm": 5.146746635437012, "learning_rate": 4.304347826086957e-05, "loss": 0.4359, "step": 161 }, { "epoch": 7.0, "eval_combined_score": 0.8277698563909587, "eval_loss": 0.7249352335929871, "eval_pearson": 0.8288410902023315, "eval_runtime": 0.7508, "eval_samples_per_second": 1997.991, "eval_spearmanr": 0.8266986225795858, "eval_steps_per_second": 7.992, "step": 161 }, { "epoch": 8.0, "grad_norm": 5.357452392578125, "learning_rate": 4.204347826086957e-05, "loss": 0.3798, "step": 184 }, { "epoch": 8.0, "eval_combined_score": 0.825821478114321, "eval_loss": 0.712942898273468, "eval_pearson": 0.8281153440475464, "eval_runtime": 0.7608, "eval_samples_per_second": 1971.625, "eval_spearmanr": 0.8235276121810957, "eval_steps_per_second": 7.886, "step": 184 }, { "epoch": 9.0, "grad_norm": 5.614117622375488, "learning_rate": 4.104347826086957e-05, "loss": 0.3253, "step": 207 }, { "epoch": 9.0, "eval_combined_score": 0.816080359482636, "eval_loss": 0.7901992201805115, "eval_pearson": 0.817132830619812, "eval_runtime": 0.7697, "eval_samples_per_second": 1948.773, "eval_spearmanr": 0.81502788834546, "eval_steps_per_second": 7.795, "step": 207 }, { "epoch": 10.0, "grad_norm": 6.880125045776367, "learning_rate": 4.004347826086956e-05, "loss": 0.277, "step": 230 }, { "epoch": 10.0, "eval_combined_score": 0.8212131200017707, "eval_loss": 0.7335973381996155, "eval_pearson": 0.8229020833969116, "eval_runtime": 0.7631, "eval_samples_per_second": 1965.749, "eval_spearmanr": 0.8195241566066298, "eval_steps_per_second": 7.863, "step": 230 }, { "epoch": 11.0, "grad_norm": 5.835127830505371, "learning_rate": 3.9043478260869566e-05, "loss": 0.255, "step": 253 }, { "epoch": 11.0, "eval_combined_score": 0.8252136073634052, "eval_loss": 0.7126866579055786, "eval_pearson": 0.8275212645530701, "eval_runtime": 0.758, "eval_samples_per_second": 1978.98, "eval_spearmanr": 0.8229059501737404, "eval_steps_per_second": 7.916, "step": 253 }, { "epoch": 12.0, "grad_norm": 4.608198642730713, "learning_rate": 3.804347826086957e-05, "loss": 0.2257, "step": 276 }, { "epoch": 12.0, "eval_combined_score": 0.822482378354423, "eval_loss": 0.7646387815475464, "eval_pearson": 0.8233366012573242, "eval_runtime": 0.7643, "eval_samples_per_second": 1962.691, "eval_spearmanr": 0.8216281554515217, "eval_steps_per_second": 7.851, "step": 276 }, { "epoch": 13.0, "grad_norm": 4.352097034454346, "learning_rate": 3.704347826086957e-05, "loss": 0.204, "step": 299 }, { "epoch": 13.0, "eval_combined_score": 0.8239910243979531, "eval_loss": 0.8713997602462769, "eval_pearson": 0.8245130181312561, "eval_runtime": 0.7412, "eval_samples_per_second": 2023.683, "eval_spearmanr": 0.8234690306646502, "eval_steps_per_second": 8.095, "step": 299 }, { "epoch": 14.0, "grad_norm": 5.187889575958252, "learning_rate": 3.604347826086957e-05, "loss": 0.1957, "step": 322 }, { "epoch": 14.0, "eval_combined_score": 0.8195869793447403, "eval_loss": 0.789119303226471, "eval_pearson": 0.8212951421737671, "eval_runtime": 0.7661, "eval_samples_per_second": 1957.969, "eval_spearmanr": 0.8178788165157136, "eval_steps_per_second": 7.832, "step": 322 }, { "epoch": 15.0, "grad_norm": 6.29766321182251, "learning_rate": 3.5043478260869564e-05, "loss": 0.1725, "step": 345 }, { "epoch": 15.0, "eval_combined_score": 0.8211789182251823, "eval_loss": 0.7348401546478271, "eval_pearson": 0.8230345249176025, "eval_runtime": 0.7537, "eval_samples_per_second": 1990.18, "eval_spearmanr": 0.8193233115327619, "eval_steps_per_second": 7.961, "step": 345 }, { "epoch": 16.0, "grad_norm": 5.222928524017334, "learning_rate": 3.4043478260869566e-05, "loss": 0.1621, "step": 368 }, { "epoch": 16.0, "eval_combined_score": 0.8163744250997313, "eval_loss": 0.7909350395202637, "eval_pearson": 0.817865252494812, "eval_runtime": 0.7917, "eval_samples_per_second": 1894.615, "eval_spearmanr": 0.8148835977046506, "eval_steps_per_second": 7.578, "step": 368 }, { "epoch": 16.0, "step": 368, "total_flos": 6092331948957696.0, "train_loss": 0.6417872426302537, "train_runtime": 125.3154, "train_samples_per_second": 2293.812, "train_steps_per_second": 9.177 } ], "logging_steps": 1, "max_steps": 1150, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 5 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6092331948957696.0, "train_batch_size": 256, "trial_name": null, "trial_params": null }