variant_tapt_freeze_llrd_LR_5e / trainer_state.json
Mardiyyah's picture
End of training
be796a2 verified
{
"best_metric": 0.9155424324186734,
"best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/hf/PDBE_V1/base/variant_tapt_freeze_llrd_LR_5e/checkpoint-5984",
"epoch": 20.0,
"eval_steps": 500,
"global_step": 7040,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 5.500258922576904,
"learning_rate": 9.886363636363637e-06,
"loss": 1.9616,
"step": 352
},
{
"epoch": 1.0,
"eval_accuracy": 0.7942997075028909,
"eval_f1": 0.4435347694826792,
"eval_loss": 0.8090189695358276,
"eval_precision": 0.44396640032780166,
"eval_recall": 0.44310397709845617,
"eval_runtime": 4.7106,
"eval_samples_per_second": 256.019,
"eval_steps_per_second": 16.134,
"step": 352
},
{
"epoch": 2.0,
"grad_norm": 3.6966164112091064,
"learning_rate": 1.9886363636363638e-05,
"loss": 0.5637,
"step": 704
},
{
"epoch": 2.0,
"eval_accuracy": 0.907670679999093,
"eval_f1": 0.7394873848618342,
"eval_loss": 0.3523786664009094,
"eval_precision": 0.7244727807748896,
"eval_recall": 0.7551375115018915,
"eval_runtime": 4.7198,
"eval_samples_per_second": 255.517,
"eval_steps_per_second": 16.102,
"step": 704
},
{
"epoch": 3.0,
"grad_norm": 3.4363410472869873,
"learning_rate": 1.8901515151515153e-05,
"loss": 0.2766,
"step": 1056
},
{
"epoch": 3.0,
"eval_accuracy": 0.9382581683785683,
"eval_f1": 0.8332320162107396,
"eval_loss": 0.2291891872882843,
"eval_precision": 0.8257857214579777,
"eval_recall": 0.8408138227175136,
"eval_runtime": 4.7342,
"eval_samples_per_second": 254.741,
"eval_steps_per_second": 16.053,
"step": 1056
},
{
"epoch": 4.0,
"grad_norm": 7.272336006164551,
"learning_rate": 1.779040404040404e-05,
"loss": 0.1736,
"step": 1408
},
{
"epoch": 4.0,
"eval_accuracy": 0.9463301816202979,
"eval_f1": 0.8636762504516595,
"eval_loss": 0.21017228066921234,
"eval_precision": 0.872185154295246,
"eval_recall": 0.8553317656681321,
"eval_runtime": 4.6969,
"eval_samples_per_second": 256.763,
"eval_steps_per_second": 16.181,
"step": 1408
},
{
"epoch": 5.0,
"grad_norm": 4.899765491485596,
"learning_rate": 1.667929292929293e-05,
"loss": 0.1209,
"step": 1760
},
{
"epoch": 5.0,
"eval_accuracy": 0.9505929301861551,
"eval_f1": 0.8776264492937066,
"eval_loss": 0.19507275521755219,
"eval_precision": 0.8693079237713139,
"eval_recall": 0.8861057151620488,
"eval_runtime": 4.7971,
"eval_samples_per_second": 251.404,
"eval_steps_per_second": 15.843,
"step": 1760
},
{
"epoch": 6.0,
"grad_norm": 6.236667156219482,
"learning_rate": 1.5568181818181822e-05,
"loss": 0.0846,
"step": 2112
},
{
"epoch": 6.0,
"eval_accuracy": 0.9544248690565268,
"eval_f1": 0.8910820208631622,
"eval_loss": 0.1981223076581955,
"eval_precision": 0.8913554987212277,
"eval_recall": 0.8908087107657704,
"eval_runtime": 4.7584,
"eval_samples_per_second": 253.448,
"eval_steps_per_second": 15.972,
"step": 2112
},
{
"epoch": 7.0,
"grad_norm": 2.797636032104492,
"learning_rate": 1.4457070707070708e-05,
"loss": 0.0641,
"step": 2464
},
{
"epoch": 7.0,
"eval_accuracy": 0.9560120626714737,
"eval_f1": 0.8992974238875878,
"eval_loss": 0.1916956752538681,
"eval_precision": 0.8956495284453909,
"eval_recall": 0.9029751559145281,
"eval_runtime": 4.6977,
"eval_samples_per_second": 256.723,
"eval_steps_per_second": 16.178,
"step": 2464
},
{
"epoch": 8.0,
"grad_norm": 0.630720317363739,
"learning_rate": 1.3349116161616163e-05,
"loss": 0.0485,
"step": 2816
},
{
"epoch": 8.0,
"eval_accuracy": 0.9574858853139242,
"eval_f1": 0.9026530715771318,
"eval_loss": 0.20014619827270508,
"eval_precision": 0.8957913813934756,
"eval_recall": 0.9096206931806564,
"eval_runtime": 4.7071,
"eval_samples_per_second": 256.208,
"eval_steps_per_second": 16.146,
"step": 2816
},
{
"epoch": 9.0,
"grad_norm": 1.6925524473190308,
"learning_rate": 1.223800505050505e-05,
"loss": 0.0374,
"step": 3168
},
{
"epoch": 9.0,
"eval_accuracy": 0.9577353014534159,
"eval_f1": 0.9029548674357417,
"eval_loss": 0.19619333744049072,
"eval_precision": 0.8938194931383352,
"eval_recall": 0.9122789080871077,
"eval_runtime": 4.7068,
"eval_samples_per_second": 256.224,
"eval_steps_per_second": 16.147,
"step": 3168
},
{
"epoch": 10.0,
"grad_norm": 2.707470178604126,
"learning_rate": 1.1126893939393941e-05,
"loss": 0.0299,
"step": 3520
},
{
"epoch": 10.0,
"eval_accuracy": 0.9590050563453734,
"eval_f1": 0.9061852020431902,
"eval_loss": 0.20695683360099792,
"eval_precision": 0.896617293835068,
"eval_recall": 0.915959513342194,
"eval_runtime": 4.8969,
"eval_samples_per_second": 246.279,
"eval_steps_per_second": 15.52,
"step": 3520
},
{
"epoch": 11.0,
"grad_norm": 1.546488881111145,
"learning_rate": 1.001578282828283e-05,
"loss": 0.0238,
"step": 3872
},
{
"epoch": 11.0,
"eval_accuracy": 0.9591411015123688,
"eval_f1": 0.909967030180066,
"eval_loss": 0.2103826105594635,
"eval_precision": 0.9029595329172538,
"eval_recall": 0.9170841427256927,
"eval_runtime": 4.7212,
"eval_samples_per_second": 255.442,
"eval_steps_per_second": 16.098,
"step": 3872
},
{
"epoch": 12.0,
"grad_norm": 3.5837290287017822,
"learning_rate": 8.904671717171718e-06,
"loss": 0.0195,
"step": 4224
},
{
"epoch": 12.0,
"eval_accuracy": 0.9592998208738635,
"eval_f1": 0.9081518987341772,
"eval_loss": 0.21696116030216217,
"eval_precision": 0.8995887250476478,
"eval_recall": 0.9168796646559656,
"eval_runtime": 4.7316,
"eval_samples_per_second": 254.88,
"eval_steps_per_second": 16.062,
"step": 4224
},
{
"epoch": 13.0,
"grad_norm": 1.4384242296218872,
"learning_rate": 7.793560606060607e-06,
"loss": 0.0158,
"step": 4576
},
{
"epoch": 13.0,
"eval_accuracy": 0.9607509693218148,
"eval_f1": 0.911646790154783,
"eval_loss": 0.21840226650238037,
"eval_precision": 0.9050785973397824,
"eval_recall": 0.9183110111440548,
"eval_runtime": 4.7576,
"eval_samples_per_second": 253.492,
"eval_steps_per_second": 15.975,
"step": 4576
},
{
"epoch": 14.0,
"grad_norm": 0.07870839536190033,
"learning_rate": 6.685606060606061e-06,
"loss": 0.013,
"step": 4928
},
{
"epoch": 14.0,
"eval_accuracy": 0.9601614402648346,
"eval_f1": 0.9112886910782925,
"eval_loss": 0.225505068898201,
"eval_precision": 0.9017115403863477,
"eval_recall": 0.9210714650853696,
"eval_runtime": 4.7173,
"eval_samples_per_second": 255.653,
"eval_steps_per_second": 16.111,
"step": 4928
},
{
"epoch": 15.0,
"grad_norm": 0.22939985990524292,
"learning_rate": 5.574494949494949e-06,
"loss": 0.011,
"step": 5280
},
{
"epoch": 15.0,
"eval_accuracy": 0.9600027209033399,
"eval_f1": 0.911293592862936,
"eval_loss": 0.23664474487304688,
"eval_precision": 0.9036895546395898,
"eval_recall": 0.9190266843880994,
"eval_runtime": 4.6625,
"eval_samples_per_second": 258.659,
"eval_steps_per_second": 16.3,
"step": 5280
},
{
"epoch": 16.0,
"grad_norm": 1.5021331310272217,
"learning_rate": 4.463383838383838e-06,
"loss": 0.0101,
"step": 5632
},
{
"epoch": 16.0,
"eval_accuracy": 0.9613404983787951,
"eval_f1": 0.9153041535493043,
"eval_loss": 0.23169253766536713,
"eval_precision": 0.909210128114597,
"eval_recall": 0.9214804212248237,
"eval_runtime": 4.7345,
"eval_samples_per_second": 254.724,
"eval_steps_per_second": 16.052,
"step": 5632
},
{
"epoch": 17.0,
"grad_norm": 2.7792775630950928,
"learning_rate": 3.352272727272727e-06,
"loss": 0.0083,
"step": 5984
},
{
"epoch": 17.0,
"eval_accuracy": 0.9616579371017845,
"eval_f1": 0.9155424324186734,
"eval_loss": 0.2343681901693344,
"eval_precision": 0.9117825998783208,
"eval_recall": 0.9193334014926899,
"eval_runtime": 4.7707,
"eval_samples_per_second": 252.794,
"eval_steps_per_second": 15.931,
"step": 5984
},
{
"epoch": 18.0,
"grad_norm": 0.13295000791549683,
"learning_rate": 2.2411616161616163e-06,
"loss": 0.007,
"step": 6336
},
{
"epoch": 18.0,
"eval_accuracy": 0.9614538693512913,
"eval_f1": 0.9152025997765817,
"eval_loss": 0.23998132348060608,
"eval_precision": 0.9091092504791688,
"eval_recall": 0.9213781821899601,
"eval_runtime": 4.7197,
"eval_samples_per_second": 255.526,
"eval_steps_per_second": 16.103,
"step": 6336
},
{
"epoch": 19.0,
"grad_norm": 0.09591899812221527,
"learning_rate": 1.1300505050505053e-06,
"loss": 0.0067,
"step": 6688
},
{
"epoch": 19.0,
"eval_accuracy": 0.9611364306283019,
"eval_f1": 0.9143581938102486,
"eval_loss": 0.2395462840795517,
"eval_precision": 0.9075435592708229,
"eval_recall": 0.9212759431550966,
"eval_runtime": 4.7907,
"eval_samples_per_second": 251.737,
"eval_steps_per_second": 15.864,
"step": 6688
},
{
"epoch": 20.0,
"grad_norm": 0.8892824649810791,
"learning_rate": 1.893939393939394e-08,
"loss": 0.0063,
"step": 7040
},
{
"epoch": 20.0,
"eval_accuracy": 0.9611364306283019,
"eval_f1": 0.9145468392993145,
"eval_loss": 0.24203987419605255,
"eval_precision": 0.9084123461771233,
"eval_recall": 0.920764747980779,
"eval_runtime": 4.8931,
"eval_samples_per_second": 246.467,
"eval_steps_per_second": 15.532,
"step": 7040
},
{
"epoch": 20.0,
"step": 7040,
"total_flos": 2304092004793320.0,
"train_loss": 0.1741130460731008,
"train_runtime": 917.4844,
"train_samples_per_second": 122.705,
"train_steps_per_second": 7.673
}
],
"logging_steps": 500,
"max_steps": 7040,
"num_input_tokens_seen": 0,
"num_train_epochs": 20,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2304092004793320.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}