tinybert_base_train_kd_stsb / trainer_state.json
gokulsrinivasagan's picture
End of training
be9af84 verified
{
"best_global_step": 253,
"best_metric": 0.7126866579055786,
"best_model_checkpoint": "tinybert_base_train_kd_stsb/checkpoint-253",
"epoch": 16.0,
"eval_steps": 500,
"global_step": 368,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 11.535841941833496,
"learning_rate": 4.904347826086957e-05,
"loss": 2.613,
"step": 23
},
{
"epoch": 1.0,
"eval_combined_score": 0.121590196234291,
"eval_loss": 2.5623927116394043,
"eval_pearson": 0.11448873579502106,
"eval_runtime": 0.7966,
"eval_samples_per_second": 1883.001,
"eval_spearmanr": 0.12869165667356094,
"eval_steps_per_second": 7.532,
"step": 23
},
{
"epoch": 2.0,
"grad_norm": 43.645957946777344,
"learning_rate": 4.804347826086957e-05,
"loss": 1.8186,
"step": 46
},
{
"epoch": 2.0,
"eval_combined_score": 0.715544374833673,
"eval_loss": 1.1170806884765625,
"eval_pearson": 0.716871976852417,
"eval_runtime": 0.7553,
"eval_samples_per_second": 1985.885,
"eval_spearmanr": 0.714216772814929,
"eval_steps_per_second": 7.944,
"step": 46
},
{
"epoch": 3.0,
"grad_norm": 15.847740173339844,
"learning_rate": 4.7043478260869564e-05,
"loss": 1.1222,
"step": 69
},
{
"epoch": 3.0,
"eval_combined_score": 0.7812649103361896,
"eval_loss": 1.0924603939056396,
"eval_pearson": 0.7803035974502563,
"eval_runtime": 0.7452,
"eval_samples_per_second": 2012.821,
"eval_spearmanr": 0.7822262232221229,
"eval_steps_per_second": 8.051,
"step": 69
},
{
"epoch": 4.0,
"grad_norm": 11.357965469360352,
"learning_rate": 4.6043478260869567e-05,
"loss": 0.8374,
"step": 92
},
{
"epoch": 4.0,
"eval_combined_score": 0.8146078175768736,
"eval_loss": 0.7485730648040771,
"eval_pearson": 0.8169819712638855,
"eval_runtime": 0.7433,
"eval_samples_per_second": 2018.016,
"eval_spearmanr": 0.8122336638898616,
"eval_steps_per_second": 8.072,
"step": 92
},
{
"epoch": 5.0,
"grad_norm": 33.945899963378906,
"learning_rate": 4.504347826086956e-05,
"loss": 0.7145,
"step": 115
},
{
"epoch": 5.0,
"eval_combined_score": 0.8218050530535539,
"eval_loss": 0.7349043488502502,
"eval_pearson": 0.8232354521751404,
"eval_runtime": 0.7412,
"eval_samples_per_second": 2023.657,
"eval_spearmanr": 0.8203746539319674,
"eval_steps_per_second": 8.095,
"step": 115
},
{
"epoch": 6.0,
"grad_norm": 8.291410446166992,
"learning_rate": 4.404347826086957e-05,
"loss": 0.5299,
"step": 138
},
{
"epoch": 6.0,
"eval_combined_score": 0.8291541581362327,
"eval_loss": 0.715798020362854,
"eval_pearson": 0.8317586183547974,
"eval_runtime": 0.7634,
"eval_samples_per_second": 1964.835,
"eval_spearmanr": 0.826549697917668,
"eval_steps_per_second": 7.859,
"step": 138
},
{
"epoch": 7.0,
"grad_norm": 5.146746635437012,
"learning_rate": 4.304347826086957e-05,
"loss": 0.4359,
"step": 161
},
{
"epoch": 7.0,
"eval_combined_score": 0.8277698563909587,
"eval_loss": 0.7249352335929871,
"eval_pearson": 0.8288410902023315,
"eval_runtime": 0.7508,
"eval_samples_per_second": 1997.991,
"eval_spearmanr": 0.8266986225795858,
"eval_steps_per_second": 7.992,
"step": 161
},
{
"epoch": 8.0,
"grad_norm": 5.357452392578125,
"learning_rate": 4.204347826086957e-05,
"loss": 0.3798,
"step": 184
},
{
"epoch": 8.0,
"eval_combined_score": 0.825821478114321,
"eval_loss": 0.712942898273468,
"eval_pearson": 0.8281153440475464,
"eval_runtime": 0.7608,
"eval_samples_per_second": 1971.625,
"eval_spearmanr": 0.8235276121810957,
"eval_steps_per_second": 7.886,
"step": 184
},
{
"epoch": 9.0,
"grad_norm": 5.614117622375488,
"learning_rate": 4.104347826086957e-05,
"loss": 0.3253,
"step": 207
},
{
"epoch": 9.0,
"eval_combined_score": 0.816080359482636,
"eval_loss": 0.7901992201805115,
"eval_pearson": 0.817132830619812,
"eval_runtime": 0.7697,
"eval_samples_per_second": 1948.773,
"eval_spearmanr": 0.81502788834546,
"eval_steps_per_second": 7.795,
"step": 207
},
{
"epoch": 10.0,
"grad_norm": 6.880125045776367,
"learning_rate": 4.004347826086956e-05,
"loss": 0.277,
"step": 230
},
{
"epoch": 10.0,
"eval_combined_score": 0.8212131200017707,
"eval_loss": 0.7335973381996155,
"eval_pearson": 0.8229020833969116,
"eval_runtime": 0.7631,
"eval_samples_per_second": 1965.749,
"eval_spearmanr": 0.8195241566066298,
"eval_steps_per_second": 7.863,
"step": 230
},
{
"epoch": 11.0,
"grad_norm": 5.835127830505371,
"learning_rate": 3.9043478260869566e-05,
"loss": 0.255,
"step": 253
},
{
"epoch": 11.0,
"eval_combined_score": 0.8252136073634052,
"eval_loss": 0.7126866579055786,
"eval_pearson": 0.8275212645530701,
"eval_runtime": 0.758,
"eval_samples_per_second": 1978.98,
"eval_spearmanr": 0.8229059501737404,
"eval_steps_per_second": 7.916,
"step": 253
},
{
"epoch": 12.0,
"grad_norm": 4.608198642730713,
"learning_rate": 3.804347826086957e-05,
"loss": 0.2257,
"step": 276
},
{
"epoch": 12.0,
"eval_combined_score": 0.822482378354423,
"eval_loss": 0.7646387815475464,
"eval_pearson": 0.8233366012573242,
"eval_runtime": 0.7643,
"eval_samples_per_second": 1962.691,
"eval_spearmanr": 0.8216281554515217,
"eval_steps_per_second": 7.851,
"step": 276
},
{
"epoch": 13.0,
"grad_norm": 4.352097034454346,
"learning_rate": 3.704347826086957e-05,
"loss": 0.204,
"step": 299
},
{
"epoch": 13.0,
"eval_combined_score": 0.8239910243979531,
"eval_loss": 0.8713997602462769,
"eval_pearson": 0.8245130181312561,
"eval_runtime": 0.7412,
"eval_samples_per_second": 2023.683,
"eval_spearmanr": 0.8234690306646502,
"eval_steps_per_second": 8.095,
"step": 299
},
{
"epoch": 14.0,
"grad_norm": 5.187889575958252,
"learning_rate": 3.604347826086957e-05,
"loss": 0.1957,
"step": 322
},
{
"epoch": 14.0,
"eval_combined_score": 0.8195869793447403,
"eval_loss": 0.789119303226471,
"eval_pearson": 0.8212951421737671,
"eval_runtime": 0.7661,
"eval_samples_per_second": 1957.969,
"eval_spearmanr": 0.8178788165157136,
"eval_steps_per_second": 7.832,
"step": 322
},
{
"epoch": 15.0,
"grad_norm": 6.29766321182251,
"learning_rate": 3.5043478260869564e-05,
"loss": 0.1725,
"step": 345
},
{
"epoch": 15.0,
"eval_combined_score": 0.8211789182251823,
"eval_loss": 0.7348401546478271,
"eval_pearson": 0.8230345249176025,
"eval_runtime": 0.7537,
"eval_samples_per_second": 1990.18,
"eval_spearmanr": 0.8193233115327619,
"eval_steps_per_second": 7.961,
"step": 345
},
{
"epoch": 16.0,
"grad_norm": 5.222928524017334,
"learning_rate": 3.4043478260869566e-05,
"loss": 0.1621,
"step": 368
},
{
"epoch": 16.0,
"eval_combined_score": 0.8163744250997313,
"eval_loss": 0.7909350395202637,
"eval_pearson": 0.817865252494812,
"eval_runtime": 0.7917,
"eval_samples_per_second": 1894.615,
"eval_spearmanr": 0.8148835977046506,
"eval_steps_per_second": 7.578,
"step": 368
},
{
"epoch": 16.0,
"step": 368,
"total_flos": 6092331948957696.0,
"train_loss": 0.6417872426302537,
"train_runtime": 125.3154,
"train_samples_per_second": 2293.812,
"train_steps_per_second": 9.177
}
],
"logging_steps": 1,
"max_steps": 1150,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 5
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6092331948957696.0,
"train_batch_size": 256,
"trial_name": null,
"trial_params": null
}