| { | |
| "best_global_step": 253, | |
| "best_metric": 0.7126866579055786, | |
| "best_model_checkpoint": "tinybert_base_train_kd_stsb/checkpoint-253", | |
| "epoch": 16.0, | |
| "eval_steps": 500, | |
| "global_step": 368, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 11.535841941833496, | |
| "learning_rate": 4.904347826086957e-05, | |
| "loss": 2.613, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_combined_score": 0.121590196234291, | |
| "eval_loss": 2.5623927116394043, | |
| "eval_pearson": 0.11448873579502106, | |
| "eval_runtime": 0.7966, | |
| "eval_samples_per_second": 1883.001, | |
| "eval_spearmanr": 0.12869165667356094, | |
| "eval_steps_per_second": 7.532, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 43.645957946777344, | |
| "learning_rate": 4.804347826086957e-05, | |
| "loss": 1.8186, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_combined_score": 0.715544374833673, | |
| "eval_loss": 1.1170806884765625, | |
| "eval_pearson": 0.716871976852417, | |
| "eval_runtime": 0.7553, | |
| "eval_samples_per_second": 1985.885, | |
| "eval_spearmanr": 0.714216772814929, | |
| "eval_steps_per_second": 7.944, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 15.847740173339844, | |
| "learning_rate": 4.7043478260869564e-05, | |
| "loss": 1.1222, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_combined_score": 0.7812649103361896, | |
| "eval_loss": 1.0924603939056396, | |
| "eval_pearson": 0.7803035974502563, | |
| "eval_runtime": 0.7452, | |
| "eval_samples_per_second": 2012.821, | |
| "eval_spearmanr": 0.7822262232221229, | |
| "eval_steps_per_second": 8.051, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 11.357965469360352, | |
| "learning_rate": 4.6043478260869567e-05, | |
| "loss": 0.8374, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_combined_score": 0.8146078175768736, | |
| "eval_loss": 0.7485730648040771, | |
| "eval_pearson": 0.8169819712638855, | |
| "eval_runtime": 0.7433, | |
| "eval_samples_per_second": 2018.016, | |
| "eval_spearmanr": 0.8122336638898616, | |
| "eval_steps_per_second": 8.072, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 33.945899963378906, | |
| "learning_rate": 4.504347826086956e-05, | |
| "loss": 0.7145, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_combined_score": 0.8218050530535539, | |
| "eval_loss": 0.7349043488502502, | |
| "eval_pearson": 0.8232354521751404, | |
| "eval_runtime": 0.7412, | |
| "eval_samples_per_second": 2023.657, | |
| "eval_spearmanr": 0.8203746539319674, | |
| "eval_steps_per_second": 8.095, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 8.291410446166992, | |
| "learning_rate": 4.404347826086957e-05, | |
| "loss": 0.5299, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_combined_score": 0.8291541581362327, | |
| "eval_loss": 0.715798020362854, | |
| "eval_pearson": 0.8317586183547974, | |
| "eval_runtime": 0.7634, | |
| "eval_samples_per_second": 1964.835, | |
| "eval_spearmanr": 0.826549697917668, | |
| "eval_steps_per_second": 7.859, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 5.146746635437012, | |
| "learning_rate": 4.304347826086957e-05, | |
| "loss": 0.4359, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_combined_score": 0.8277698563909587, | |
| "eval_loss": 0.7249352335929871, | |
| "eval_pearson": 0.8288410902023315, | |
| "eval_runtime": 0.7508, | |
| "eval_samples_per_second": 1997.991, | |
| "eval_spearmanr": 0.8266986225795858, | |
| "eval_steps_per_second": 7.992, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 5.357452392578125, | |
| "learning_rate": 4.204347826086957e-05, | |
| "loss": 0.3798, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_combined_score": 0.825821478114321, | |
| "eval_loss": 0.712942898273468, | |
| "eval_pearson": 0.8281153440475464, | |
| "eval_runtime": 0.7608, | |
| "eval_samples_per_second": 1971.625, | |
| "eval_spearmanr": 0.8235276121810957, | |
| "eval_steps_per_second": 7.886, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 5.614117622375488, | |
| "learning_rate": 4.104347826086957e-05, | |
| "loss": 0.3253, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_combined_score": 0.816080359482636, | |
| "eval_loss": 0.7901992201805115, | |
| "eval_pearson": 0.817132830619812, | |
| "eval_runtime": 0.7697, | |
| "eval_samples_per_second": 1948.773, | |
| "eval_spearmanr": 0.81502788834546, | |
| "eval_steps_per_second": 7.795, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 6.880125045776367, | |
| "learning_rate": 4.004347826086956e-05, | |
| "loss": 0.277, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_combined_score": 0.8212131200017707, | |
| "eval_loss": 0.7335973381996155, | |
| "eval_pearson": 0.8229020833969116, | |
| "eval_runtime": 0.7631, | |
| "eval_samples_per_second": 1965.749, | |
| "eval_spearmanr": 0.8195241566066298, | |
| "eval_steps_per_second": 7.863, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 5.835127830505371, | |
| "learning_rate": 3.9043478260869566e-05, | |
| "loss": 0.255, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_combined_score": 0.8252136073634052, | |
| "eval_loss": 0.7126866579055786, | |
| "eval_pearson": 0.8275212645530701, | |
| "eval_runtime": 0.758, | |
| "eval_samples_per_second": 1978.98, | |
| "eval_spearmanr": 0.8229059501737404, | |
| "eval_steps_per_second": 7.916, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 4.608198642730713, | |
| "learning_rate": 3.804347826086957e-05, | |
| "loss": 0.2257, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_combined_score": 0.822482378354423, | |
| "eval_loss": 0.7646387815475464, | |
| "eval_pearson": 0.8233366012573242, | |
| "eval_runtime": 0.7643, | |
| "eval_samples_per_second": 1962.691, | |
| "eval_spearmanr": 0.8216281554515217, | |
| "eval_steps_per_second": 7.851, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 4.352097034454346, | |
| "learning_rate": 3.704347826086957e-05, | |
| "loss": 0.204, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_combined_score": 0.8239910243979531, | |
| "eval_loss": 0.8713997602462769, | |
| "eval_pearson": 0.8245130181312561, | |
| "eval_runtime": 0.7412, | |
| "eval_samples_per_second": 2023.683, | |
| "eval_spearmanr": 0.8234690306646502, | |
| "eval_steps_per_second": 8.095, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 5.187889575958252, | |
| "learning_rate": 3.604347826086957e-05, | |
| "loss": 0.1957, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_combined_score": 0.8195869793447403, | |
| "eval_loss": 0.789119303226471, | |
| "eval_pearson": 0.8212951421737671, | |
| "eval_runtime": 0.7661, | |
| "eval_samples_per_second": 1957.969, | |
| "eval_spearmanr": 0.8178788165157136, | |
| "eval_steps_per_second": 7.832, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 6.29766321182251, | |
| "learning_rate": 3.5043478260869564e-05, | |
| "loss": 0.1725, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_combined_score": 0.8211789182251823, | |
| "eval_loss": 0.7348401546478271, | |
| "eval_pearson": 0.8230345249176025, | |
| "eval_runtime": 0.7537, | |
| "eval_samples_per_second": 1990.18, | |
| "eval_spearmanr": 0.8193233115327619, | |
| "eval_steps_per_second": 7.961, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 5.222928524017334, | |
| "learning_rate": 3.4043478260869566e-05, | |
| "loss": 0.1621, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_combined_score": 0.8163744250997313, | |
| "eval_loss": 0.7909350395202637, | |
| "eval_pearson": 0.817865252494812, | |
| "eval_runtime": 0.7917, | |
| "eval_samples_per_second": 1894.615, | |
| "eval_spearmanr": 0.8148835977046506, | |
| "eval_steps_per_second": 7.578, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "step": 368, | |
| "total_flos": 6092331948957696.0, | |
| "train_loss": 0.6417872426302537, | |
| "train_runtime": 125.3154, | |
| "train_samples_per_second": 2293.812, | |
| "train_steps_per_second": 9.177 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1150, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 5 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6092331948957696.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |