variant-tapt_base-LR_2e-05 / trainer_state.json
Mardiyyah's picture
End of training
f829109 verified
{
"best_metric": 1.4045588970184326,
"best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/Continued_pretraining/TAPT/microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext/variant-tapt_base-LR_2e-05/checkpoint-798",
"epoch": 50.0,
"eval_steps": 500,
"global_step": 950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 3.445349931716919,
"learning_rate": 6.31578947368421e-06,
"loss": 1.7258,
"step": 19
},
{
"epoch": 1.0,
"eval_accuracy": 0.691991341991342,
"eval_loss": 1.7872645854949951,
"eval_runtime": 1.2711,
"eval_samples_per_second": 94.406,
"eval_steps_per_second": 6.294,
"step": 19
},
{
"epoch": 2.0,
"grad_norm": 3.7695751190185547,
"learning_rate": 1.263157894736842e-05,
"loss": 1.708,
"step": 38
},
{
"epoch": 2.0,
"eval_accuracy": 0.6962121212121212,
"eval_loss": 1.73982572555542,
"eval_runtime": 0.9424,
"eval_samples_per_second": 127.337,
"eval_steps_per_second": 8.489,
"step": 38
},
{
"epoch": 3.0,
"grad_norm": 3.6540446281433105,
"learning_rate": 1.929824561403509e-05,
"loss": 1.6506,
"step": 57
},
{
"epoch": 3.0,
"eval_accuracy": 0.6972943722943723,
"eval_loss": 1.680162787437439,
"eval_runtime": 0.9581,
"eval_samples_per_second": 125.245,
"eval_steps_per_second": 8.35,
"step": 57
},
{
"epoch": 4.0,
"grad_norm": 3.684107780456543,
"learning_rate": 1.961926091825308e-05,
"loss": 1.5883,
"step": 76
},
{
"epoch": 4.0,
"eval_accuracy": 0.7041125541125541,
"eval_loss": 1.6647979021072388,
"eval_runtime": 1.1313,
"eval_samples_per_second": 106.076,
"eval_steps_per_second": 7.072,
"step": 76
},
{
"epoch": 5.0,
"grad_norm": 3.216653347015381,
"learning_rate": 1.921612541993281e-05,
"loss": 1.567,
"step": 95
},
{
"epoch": 5.0,
"eval_accuracy": 0.703030303030303,
"eval_loss": 1.6342447996139526,
"eval_runtime": 1.1214,
"eval_samples_per_second": 107.012,
"eval_steps_per_second": 7.134,
"step": 95
},
{
"epoch": 6.0,
"grad_norm": 3.901475191116333,
"learning_rate": 1.8790593505039197e-05,
"loss": 1.5485,
"step": 114
},
{
"epoch": 6.0,
"eval_accuracy": 0.7146103896103896,
"eval_loss": 1.542972207069397,
"eval_runtime": 0.9254,
"eval_samples_per_second": 129.672,
"eval_steps_per_second": 8.645,
"step": 114
},
{
"epoch": 7.0,
"grad_norm": 3.615143299102783,
"learning_rate": 1.836506159014558e-05,
"loss": 1.5105,
"step": 133
},
{
"epoch": 7.0,
"eval_accuracy": 0.7112554112554113,
"eval_loss": 1.529628872871399,
"eval_runtime": 0.9685,
"eval_samples_per_second": 123.909,
"eval_steps_per_second": 8.261,
"step": 133
},
{
"epoch": 8.0,
"grad_norm": 3.3348517417907715,
"learning_rate": 1.793952967525196e-05,
"loss": 1.4635,
"step": 152
},
{
"epoch": 8.0,
"eval_accuracy": 0.7077922077922078,
"eval_loss": 1.6213933229446411,
"eval_runtime": 0.964,
"eval_samples_per_second": 124.478,
"eval_steps_per_second": 8.299,
"step": 152
},
{
"epoch": 9.0,
"grad_norm": 3.2985076904296875,
"learning_rate": 1.7513997760358343e-05,
"loss": 1.4841,
"step": 171
},
{
"epoch": 9.0,
"eval_accuracy": 0.712012987012987,
"eval_loss": 1.521169662475586,
"eval_runtime": 0.9525,
"eval_samples_per_second": 125.98,
"eval_steps_per_second": 8.399,
"step": 171
},
{
"epoch": 10.0,
"grad_norm": 3.7003631591796875,
"learning_rate": 1.708846584546473e-05,
"loss": 1.4663,
"step": 190
},
{
"epoch": 10.0,
"eval_accuracy": 0.7034632034632035,
"eval_loss": 1.5628341436386108,
"eval_runtime": 0.9596,
"eval_samples_per_second": 125.047,
"eval_steps_per_second": 8.336,
"step": 190
},
{
"epoch": 11.0,
"grad_norm": 3.4336016178131104,
"learning_rate": 1.666293393057111e-05,
"loss": 1.4282,
"step": 209
},
{
"epoch": 11.0,
"eval_accuracy": 0.7164502164502164,
"eval_loss": 1.5350743532180786,
"eval_runtime": 0.9381,
"eval_samples_per_second": 127.92,
"eval_steps_per_second": 8.528,
"step": 209
},
{
"epoch": 12.0,
"grad_norm": 3.400257110595703,
"learning_rate": 1.6237402015677492e-05,
"loss": 1.4511,
"step": 228
},
{
"epoch": 12.0,
"eval_accuracy": 0.7095238095238096,
"eval_loss": 1.5299878120422363,
"eval_runtime": 0.9815,
"eval_samples_per_second": 122.261,
"eval_steps_per_second": 8.151,
"step": 228
},
{
"epoch": 13.0,
"grad_norm": 3.7612063884735107,
"learning_rate": 1.5811870100783874e-05,
"loss": 1.4318,
"step": 247
},
{
"epoch": 13.0,
"eval_accuracy": 0.7148268398268398,
"eval_loss": 1.5255870819091797,
"eval_runtime": 1.0085,
"eval_samples_per_second": 118.989,
"eval_steps_per_second": 7.933,
"step": 247
},
{
"epoch": 14.0,
"grad_norm": 4.146888732910156,
"learning_rate": 1.5408734602463605e-05,
"loss": 1.4241,
"step": 266
},
{
"epoch": 14.0,
"eval_accuracy": 0.7146103896103896,
"eval_loss": 1.4872480630874634,
"eval_runtime": 0.9584,
"eval_samples_per_second": 125.214,
"eval_steps_per_second": 8.348,
"step": 266
},
{
"epoch": 15.0,
"grad_norm": 3.4961729049682617,
"learning_rate": 1.498320268756999e-05,
"loss": 1.4235,
"step": 285
},
{
"epoch": 15.0,
"eval_accuracy": 0.7087662337662337,
"eval_loss": 1.543083667755127,
"eval_runtime": 0.9678,
"eval_samples_per_second": 123.991,
"eval_steps_per_second": 8.266,
"step": 285
},
{
"epoch": 16.0,
"grad_norm": 3.633533477783203,
"learning_rate": 1.4557670772676373e-05,
"loss": 1.3905,
"step": 304
},
{
"epoch": 16.0,
"eval_accuracy": 0.7096320346320346,
"eval_loss": 1.5830901861190796,
"eval_runtime": 0.9543,
"eval_samples_per_second": 125.744,
"eval_steps_per_second": 8.383,
"step": 304
},
{
"epoch": 17.0,
"grad_norm": 3.480095863342285,
"learning_rate": 1.4132138857782756e-05,
"loss": 1.3526,
"step": 323
},
{
"epoch": 17.0,
"eval_accuracy": 0.7175324675324676,
"eval_loss": 1.4920153617858887,
"eval_runtime": 0.9587,
"eval_samples_per_second": 125.168,
"eval_steps_per_second": 8.345,
"step": 323
},
{
"epoch": 18.0,
"grad_norm": 3.672355890274048,
"learning_rate": 1.3706606942889138e-05,
"loss": 1.3733,
"step": 342
},
{
"epoch": 18.0,
"eval_accuracy": 0.7103896103896103,
"eval_loss": 1.5017799139022827,
"eval_runtime": 0.947,
"eval_samples_per_second": 126.722,
"eval_steps_per_second": 8.448,
"step": 342
},
{
"epoch": 19.0,
"grad_norm": 3.355855941772461,
"learning_rate": 1.3281075027995522e-05,
"loss": 1.3673,
"step": 361
},
{
"epoch": 19.0,
"eval_accuracy": 0.7179653679653679,
"eval_loss": 1.4765794277191162,
"eval_runtime": 0.9661,
"eval_samples_per_second": 124.207,
"eval_steps_per_second": 8.28,
"step": 361
},
{
"epoch": 20.0,
"grad_norm": 3.565549850463867,
"learning_rate": 1.2855543113101904e-05,
"loss": 1.3631,
"step": 380
},
{
"epoch": 20.0,
"eval_accuracy": 0.7141774891774891,
"eval_loss": 1.4877734184265137,
"eval_runtime": 1.0066,
"eval_samples_per_second": 119.211,
"eval_steps_per_second": 7.947,
"step": 380
},
{
"epoch": 21.0,
"grad_norm": 3.4417223930358887,
"learning_rate": 1.2430011198208288e-05,
"loss": 1.3709,
"step": 399
},
{
"epoch": 21.0,
"eval_accuracy": 0.7038961038961039,
"eval_loss": 1.542179822921753,
"eval_runtime": 1.021,
"eval_samples_per_second": 117.535,
"eval_steps_per_second": 7.836,
"step": 399
},
{
"epoch": 22.0,
"grad_norm": 3.8988301753997803,
"learning_rate": 1.2004479283314671e-05,
"loss": 1.3408,
"step": 418
},
{
"epoch": 22.0,
"eval_accuracy": 0.7205627705627705,
"eval_loss": 1.4855471849441528,
"eval_runtime": 0.9705,
"eval_samples_per_second": 123.653,
"eval_steps_per_second": 8.244,
"step": 418
},
{
"epoch": 23.0,
"grad_norm": 3.567075729370117,
"learning_rate": 1.1578947368421053e-05,
"loss": 1.3311,
"step": 437
},
{
"epoch": 23.0,
"eval_accuracy": 0.7156926406926407,
"eval_loss": 1.5094949007034302,
"eval_runtime": 0.9498,
"eval_samples_per_second": 126.338,
"eval_steps_per_second": 8.423,
"step": 437
},
{
"epoch": 24.0,
"grad_norm": 3.7643215656280518,
"learning_rate": 1.1153415453527437e-05,
"loss": 1.3144,
"step": 456
},
{
"epoch": 24.0,
"eval_accuracy": 0.7156926406926407,
"eval_loss": 1.5173320770263672,
"eval_runtime": 0.918,
"eval_samples_per_second": 130.722,
"eval_steps_per_second": 8.715,
"step": 456
},
{
"epoch": 25.0,
"grad_norm": 4.011295318603516,
"learning_rate": 1.0727883538633819e-05,
"loss": 1.297,
"step": 475
},
{
"epoch": 25.0,
"eval_accuracy": 0.7215367965367966,
"eval_loss": 1.4742799997329712,
"eval_runtime": 0.9605,
"eval_samples_per_second": 124.934,
"eval_steps_per_second": 8.329,
"step": 475
},
{
"epoch": 26.0,
"grad_norm": 3.695364475250244,
"learning_rate": 1.0302351623740203e-05,
"loss": 1.3343,
"step": 494
},
{
"epoch": 26.0,
"eval_accuracy": 0.7112554112554113,
"eval_loss": 1.5012328624725342,
"eval_runtime": 0.9802,
"eval_samples_per_second": 122.418,
"eval_steps_per_second": 8.161,
"step": 494
},
{
"epoch": 27.0,
"grad_norm": 3.5467514991760254,
"learning_rate": 9.876819708846585e-06,
"loss": 1.2949,
"step": 513
},
{
"epoch": 27.0,
"eval_accuracy": 0.7146103896103896,
"eval_loss": 1.4987872838974,
"eval_runtime": 0.9828,
"eval_samples_per_second": 122.102,
"eval_steps_per_second": 8.14,
"step": 513
},
{
"epoch": 28.0,
"grad_norm": 3.4811294078826904,
"learning_rate": 9.451287793952969e-06,
"loss": 1.3182,
"step": 532
},
{
"epoch": 28.0,
"eval_accuracy": 0.7242424242424242,
"eval_loss": 1.4198087453842163,
"eval_runtime": 0.9426,
"eval_samples_per_second": 127.306,
"eval_steps_per_second": 8.487,
"step": 532
},
{
"epoch": 29.0,
"grad_norm": 3.478583812713623,
"learning_rate": 9.02575587905935e-06,
"loss": 1.3005,
"step": 551
},
{
"epoch": 29.0,
"eval_accuracy": 0.7161255411255412,
"eval_loss": 1.4723750352859497,
"eval_runtime": 0.9211,
"eval_samples_per_second": 130.285,
"eval_steps_per_second": 8.686,
"step": 551
},
{
"epoch": 30.0,
"grad_norm": 3.6417274475097656,
"learning_rate": 8.600223964165734e-06,
"loss": 1.2821,
"step": 570
},
{
"epoch": 30.0,
"eval_accuracy": 0.7204545454545455,
"eval_loss": 1.4705064296722412,
"eval_runtime": 0.9842,
"eval_samples_per_second": 121.923,
"eval_steps_per_second": 8.128,
"step": 570
},
{
"epoch": 31.0,
"grad_norm": 3.6498682498931885,
"learning_rate": 8.174692049272118e-06,
"loss": 1.278,
"step": 589
},
{
"epoch": 31.0,
"eval_accuracy": 0.7201298701298702,
"eval_loss": 1.4779876470565796,
"eval_runtime": 0.9416,
"eval_samples_per_second": 127.449,
"eval_steps_per_second": 8.497,
"step": 589
},
{
"epoch": 32.0,
"grad_norm": 3.44429087638855,
"learning_rate": 7.7491601343785e-06,
"loss": 1.274,
"step": 608
},
{
"epoch": 32.0,
"eval_accuracy": 0.7128787878787879,
"eval_loss": 1.5007588863372803,
"eval_runtime": 0.951,
"eval_samples_per_second": 126.182,
"eval_steps_per_second": 8.412,
"step": 608
},
{
"epoch": 33.0,
"grad_norm": 3.3195676803588867,
"learning_rate": 7.323628219484883e-06,
"loss": 1.2849,
"step": 627
},
{
"epoch": 33.0,
"eval_accuracy": 0.720021645021645,
"eval_loss": 1.457064151763916,
"eval_runtime": 0.9439,
"eval_samples_per_second": 127.138,
"eval_steps_per_second": 8.476,
"step": 627
},
{
"epoch": 34.0,
"grad_norm": 3.599475145339966,
"learning_rate": 6.8980963045912665e-06,
"loss": 1.2607,
"step": 646
},
{
"epoch": 34.0,
"eval_accuracy": 0.7246753246753247,
"eval_loss": 1.4253478050231934,
"eval_runtime": 0.9878,
"eval_samples_per_second": 121.479,
"eval_steps_per_second": 8.099,
"step": 646
},
{
"epoch": 35.0,
"grad_norm": 3.3579301834106445,
"learning_rate": 6.4725643896976485e-06,
"loss": 1.2673,
"step": 665
},
{
"epoch": 35.0,
"eval_accuracy": 0.7100649350649351,
"eval_loss": 1.5111687183380127,
"eval_runtime": 0.9938,
"eval_samples_per_second": 120.753,
"eval_steps_per_second": 8.05,
"step": 665
},
{
"epoch": 36.0,
"grad_norm": 3.2128427028656006,
"learning_rate": 6.047032474804032e-06,
"loss": 1.259,
"step": 684
},
{
"epoch": 36.0,
"eval_accuracy": 0.714935064935065,
"eval_loss": 1.509379506111145,
"eval_runtime": 0.985,
"eval_samples_per_second": 121.825,
"eval_steps_per_second": 8.122,
"step": 684
},
{
"epoch": 37.0,
"grad_norm": 3.8870420455932617,
"learning_rate": 5.621500559910414e-06,
"loss": 1.2348,
"step": 703
},
{
"epoch": 37.0,
"eval_accuracy": 0.7216450216450216,
"eval_loss": 1.4843716621398926,
"eval_runtime": 0.9616,
"eval_samples_per_second": 124.787,
"eval_steps_per_second": 8.319,
"step": 703
},
{
"epoch": 38.0,
"grad_norm": 3.471414566040039,
"learning_rate": 5.195968645016798e-06,
"loss": 1.2561,
"step": 722
},
{
"epoch": 38.0,
"eval_accuracy": 0.7170995670995671,
"eval_loss": 1.4628422260284424,
"eval_runtime": 0.9514,
"eval_samples_per_second": 126.133,
"eval_steps_per_second": 8.409,
"step": 722
},
{
"epoch": 39.0,
"grad_norm": 3.5081968307495117,
"learning_rate": 4.770436730123181e-06,
"loss": 1.2464,
"step": 741
},
{
"epoch": 39.0,
"eval_accuracy": 0.7182900432900433,
"eval_loss": 1.471142292022705,
"eval_runtime": 0.9807,
"eval_samples_per_second": 122.358,
"eval_steps_per_second": 8.157,
"step": 741
},
{
"epoch": 40.0,
"grad_norm": 3.555746555328369,
"learning_rate": 4.344904815229564e-06,
"loss": 1.2483,
"step": 760
},
{
"epoch": 40.0,
"eval_accuracy": 0.7228354978354978,
"eval_loss": 1.461737871170044,
"eval_runtime": 0.9684,
"eval_samples_per_second": 123.91,
"eval_steps_per_second": 8.261,
"step": 760
},
{
"epoch": 41.0,
"grad_norm": 3.553662061691284,
"learning_rate": 3.9193729003359465e-06,
"loss": 1.2392,
"step": 779
},
{
"epoch": 41.0,
"eval_accuracy": 0.7164502164502164,
"eval_loss": 1.4649699926376343,
"eval_runtime": 1.0032,
"eval_samples_per_second": 119.615,
"eval_steps_per_second": 7.974,
"step": 779
},
{
"epoch": 42.0,
"grad_norm": 3.924736738204956,
"learning_rate": 3.4938409854423293e-06,
"loss": 1.2306,
"step": 798
},
{
"epoch": 42.0,
"eval_accuracy": 0.7258658008658009,
"eval_loss": 1.4045588970184326,
"eval_runtime": 0.9561,
"eval_samples_per_second": 125.506,
"eval_steps_per_second": 8.367,
"step": 798
},
{
"epoch": 43.0,
"grad_norm": 3.97886061668396,
"learning_rate": 3.068309070548712e-06,
"loss": 1.2328,
"step": 817
},
{
"epoch": 43.0,
"eval_accuracy": 0.7140692640692641,
"eval_loss": 1.4773471355438232,
"eval_runtime": 0.9312,
"eval_samples_per_second": 128.868,
"eval_steps_per_second": 8.591,
"step": 817
},
{
"epoch": 44.0,
"grad_norm": 3.708599805831909,
"learning_rate": 2.642777155655095e-06,
"loss": 1.2493,
"step": 836
},
{
"epoch": 44.0,
"eval_accuracy": 0.7229437229437229,
"eval_loss": 1.45064115524292,
"eval_runtime": 0.9666,
"eval_samples_per_second": 124.146,
"eval_steps_per_second": 8.276,
"step": 836
},
{
"epoch": 45.0,
"grad_norm": 3.3163161277770996,
"learning_rate": 2.2172452407614783e-06,
"loss": 1.2349,
"step": 855
},
{
"epoch": 45.0,
"eval_accuracy": 0.7072510822510822,
"eval_loss": 1.5113204717636108,
"eval_runtime": 0.9644,
"eval_samples_per_second": 124.432,
"eval_steps_per_second": 8.295,
"step": 855
},
{
"epoch": 46.0,
"grad_norm": 3.812030553817749,
"learning_rate": 1.7917133258678612e-06,
"loss": 1.2352,
"step": 874
},
{
"epoch": 46.0,
"eval_accuracy": 0.7154761904761905,
"eval_loss": 1.478694200515747,
"eval_runtime": 1.0642,
"eval_samples_per_second": 112.764,
"eval_steps_per_second": 7.518,
"step": 874
},
{
"epoch": 47.0,
"grad_norm": 3.363664388656616,
"learning_rate": 1.3661814109742442e-06,
"loss": 1.2469,
"step": 893
},
{
"epoch": 47.0,
"eval_accuracy": 0.7175324675324676,
"eval_loss": 1.4404964447021484,
"eval_runtime": 0.9729,
"eval_samples_per_second": 123.34,
"eval_steps_per_second": 8.223,
"step": 893
},
{
"epoch": 48.0,
"grad_norm": 3.373873710632324,
"learning_rate": 9.406494960806272e-07,
"loss": 1.2215,
"step": 912
},
{
"epoch": 48.0,
"eval_accuracy": 0.7176406926406926,
"eval_loss": 1.4719493389129639,
"eval_runtime": 0.9636,
"eval_samples_per_second": 124.529,
"eval_steps_per_second": 8.302,
"step": 912
},
{
"epoch": 49.0,
"grad_norm": 3.360140562057495,
"learning_rate": 5.151175811870101e-07,
"loss": 1.2238,
"step": 931
},
{
"epoch": 49.0,
"eval_accuracy": 0.7194805194805195,
"eval_loss": 1.4799143075942993,
"eval_runtime": 0.9979,
"eval_samples_per_second": 120.25,
"eval_steps_per_second": 8.017,
"step": 931
},
{
"epoch": 50.0,
"grad_norm": 3.6860737800598145,
"learning_rate": 8.958566629339306e-08,
"loss": 1.2371,
"step": 950
},
{
"epoch": 50.0,
"eval_accuracy": 0.7123376623376624,
"eval_loss": 1.488216519355774,
"eval_runtime": 0.9315,
"eval_samples_per_second": 128.825,
"eval_steps_per_second": 8.588,
"step": 950
},
{
"epoch": 50.0,
"step": 950,
"total_flos": 7922583223296000.0,
"train_loss": 1.3572578932109631,
"train_runtime": 567.5136,
"train_samples_per_second": 53.038,
"train_steps_per_second": 1.674
}
],
"logging_steps": 37,
"max_steps": 950,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7922583223296000.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}