{ "best_metric": 1.4045588970184326, "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/Continued_pretraining/TAPT/microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext/variant-tapt_base-LR_2e-05/checkpoint-798", "epoch": 50.0, "eval_steps": 500, "global_step": 950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.445349931716919, "learning_rate": 6.31578947368421e-06, "loss": 1.7258, "step": 19 }, { "epoch": 1.0, "eval_accuracy": 0.691991341991342, "eval_loss": 1.7872645854949951, "eval_runtime": 1.2711, "eval_samples_per_second": 94.406, "eval_steps_per_second": 6.294, "step": 19 }, { "epoch": 2.0, "grad_norm": 3.7695751190185547, "learning_rate": 1.263157894736842e-05, "loss": 1.708, "step": 38 }, { "epoch": 2.0, "eval_accuracy": 0.6962121212121212, "eval_loss": 1.73982572555542, "eval_runtime": 0.9424, "eval_samples_per_second": 127.337, "eval_steps_per_second": 8.489, "step": 38 }, { "epoch": 3.0, "grad_norm": 3.6540446281433105, "learning_rate": 1.929824561403509e-05, "loss": 1.6506, "step": 57 }, { "epoch": 3.0, "eval_accuracy": 0.6972943722943723, "eval_loss": 1.680162787437439, "eval_runtime": 0.9581, "eval_samples_per_second": 125.245, "eval_steps_per_second": 8.35, "step": 57 }, { "epoch": 4.0, "grad_norm": 3.684107780456543, "learning_rate": 1.961926091825308e-05, "loss": 1.5883, "step": 76 }, { "epoch": 4.0, "eval_accuracy": 0.7041125541125541, "eval_loss": 1.6647979021072388, "eval_runtime": 1.1313, "eval_samples_per_second": 106.076, "eval_steps_per_second": 7.072, "step": 76 }, { "epoch": 5.0, "grad_norm": 3.216653347015381, "learning_rate": 1.921612541993281e-05, "loss": 1.567, "step": 95 }, { "epoch": 5.0, "eval_accuracy": 0.703030303030303, "eval_loss": 1.6342447996139526, "eval_runtime": 1.1214, "eval_samples_per_second": 107.012, "eval_steps_per_second": 7.134, "step": 95 }, { "epoch": 6.0, "grad_norm": 3.901475191116333, "learning_rate": 1.8790593505039197e-05, "loss": 1.5485, "step": 114 }, { "epoch": 6.0, "eval_accuracy": 0.7146103896103896, "eval_loss": 1.542972207069397, "eval_runtime": 0.9254, "eval_samples_per_second": 129.672, "eval_steps_per_second": 8.645, "step": 114 }, { "epoch": 7.0, "grad_norm": 3.615143299102783, "learning_rate": 1.836506159014558e-05, "loss": 1.5105, "step": 133 }, { "epoch": 7.0, "eval_accuracy": 0.7112554112554113, "eval_loss": 1.529628872871399, "eval_runtime": 0.9685, "eval_samples_per_second": 123.909, "eval_steps_per_second": 8.261, "step": 133 }, { "epoch": 8.0, "grad_norm": 3.3348517417907715, "learning_rate": 1.793952967525196e-05, "loss": 1.4635, "step": 152 }, { "epoch": 8.0, "eval_accuracy": 0.7077922077922078, "eval_loss": 1.6213933229446411, "eval_runtime": 0.964, "eval_samples_per_second": 124.478, "eval_steps_per_second": 8.299, "step": 152 }, { "epoch": 9.0, "grad_norm": 3.2985076904296875, "learning_rate": 1.7513997760358343e-05, "loss": 1.4841, "step": 171 }, { "epoch": 9.0, "eval_accuracy": 0.712012987012987, "eval_loss": 1.521169662475586, "eval_runtime": 0.9525, "eval_samples_per_second": 125.98, "eval_steps_per_second": 8.399, "step": 171 }, { "epoch": 10.0, "grad_norm": 3.7003631591796875, "learning_rate": 1.708846584546473e-05, "loss": 1.4663, "step": 190 }, { "epoch": 10.0, "eval_accuracy": 0.7034632034632035, "eval_loss": 1.5628341436386108, "eval_runtime": 0.9596, "eval_samples_per_second": 125.047, "eval_steps_per_second": 8.336, "step": 190 }, { "epoch": 11.0, "grad_norm": 3.4336016178131104, "learning_rate": 1.666293393057111e-05, "loss": 1.4282, "step": 209 }, { "epoch": 11.0, "eval_accuracy": 0.7164502164502164, "eval_loss": 1.5350743532180786, "eval_runtime": 0.9381, "eval_samples_per_second": 127.92, "eval_steps_per_second": 8.528, "step": 209 }, { "epoch": 12.0, "grad_norm": 3.400257110595703, "learning_rate": 1.6237402015677492e-05, "loss": 1.4511, "step": 228 }, { "epoch": 12.0, "eval_accuracy": 0.7095238095238096, "eval_loss": 1.5299878120422363, "eval_runtime": 0.9815, "eval_samples_per_second": 122.261, "eval_steps_per_second": 8.151, "step": 228 }, { "epoch": 13.0, "grad_norm": 3.7612063884735107, "learning_rate": 1.5811870100783874e-05, "loss": 1.4318, "step": 247 }, { "epoch": 13.0, "eval_accuracy": 0.7148268398268398, "eval_loss": 1.5255870819091797, "eval_runtime": 1.0085, "eval_samples_per_second": 118.989, "eval_steps_per_second": 7.933, "step": 247 }, { "epoch": 14.0, "grad_norm": 4.146888732910156, "learning_rate": 1.5408734602463605e-05, "loss": 1.4241, "step": 266 }, { "epoch": 14.0, "eval_accuracy": 0.7146103896103896, "eval_loss": 1.4872480630874634, "eval_runtime": 0.9584, "eval_samples_per_second": 125.214, "eval_steps_per_second": 8.348, "step": 266 }, { "epoch": 15.0, "grad_norm": 3.4961729049682617, "learning_rate": 1.498320268756999e-05, "loss": 1.4235, "step": 285 }, { "epoch": 15.0, "eval_accuracy": 0.7087662337662337, "eval_loss": 1.543083667755127, "eval_runtime": 0.9678, "eval_samples_per_second": 123.991, "eval_steps_per_second": 8.266, "step": 285 }, { "epoch": 16.0, "grad_norm": 3.633533477783203, "learning_rate": 1.4557670772676373e-05, "loss": 1.3905, "step": 304 }, { "epoch": 16.0, "eval_accuracy": 0.7096320346320346, "eval_loss": 1.5830901861190796, "eval_runtime": 0.9543, "eval_samples_per_second": 125.744, "eval_steps_per_second": 8.383, "step": 304 }, { "epoch": 17.0, "grad_norm": 3.480095863342285, "learning_rate": 1.4132138857782756e-05, "loss": 1.3526, "step": 323 }, { "epoch": 17.0, "eval_accuracy": 0.7175324675324676, "eval_loss": 1.4920153617858887, "eval_runtime": 0.9587, "eval_samples_per_second": 125.168, "eval_steps_per_second": 8.345, "step": 323 }, { "epoch": 18.0, "grad_norm": 3.672355890274048, "learning_rate": 1.3706606942889138e-05, "loss": 1.3733, "step": 342 }, { "epoch": 18.0, "eval_accuracy": 0.7103896103896103, "eval_loss": 1.5017799139022827, "eval_runtime": 0.947, "eval_samples_per_second": 126.722, "eval_steps_per_second": 8.448, "step": 342 }, { "epoch": 19.0, "grad_norm": 3.355855941772461, "learning_rate": 1.3281075027995522e-05, "loss": 1.3673, "step": 361 }, { "epoch": 19.0, "eval_accuracy": 0.7179653679653679, "eval_loss": 1.4765794277191162, "eval_runtime": 0.9661, "eval_samples_per_second": 124.207, "eval_steps_per_second": 8.28, "step": 361 }, { "epoch": 20.0, "grad_norm": 3.565549850463867, "learning_rate": 1.2855543113101904e-05, "loss": 1.3631, "step": 380 }, { "epoch": 20.0, "eval_accuracy": 0.7141774891774891, "eval_loss": 1.4877734184265137, "eval_runtime": 1.0066, "eval_samples_per_second": 119.211, "eval_steps_per_second": 7.947, "step": 380 }, { "epoch": 21.0, "grad_norm": 3.4417223930358887, "learning_rate": 1.2430011198208288e-05, "loss": 1.3709, "step": 399 }, { "epoch": 21.0, "eval_accuracy": 0.7038961038961039, "eval_loss": 1.542179822921753, "eval_runtime": 1.021, "eval_samples_per_second": 117.535, "eval_steps_per_second": 7.836, "step": 399 }, { "epoch": 22.0, "grad_norm": 3.8988301753997803, "learning_rate": 1.2004479283314671e-05, "loss": 1.3408, "step": 418 }, { "epoch": 22.0, "eval_accuracy": 0.7205627705627705, "eval_loss": 1.4855471849441528, "eval_runtime": 0.9705, "eval_samples_per_second": 123.653, "eval_steps_per_second": 8.244, "step": 418 }, { "epoch": 23.0, "grad_norm": 3.567075729370117, "learning_rate": 1.1578947368421053e-05, "loss": 1.3311, "step": 437 }, { "epoch": 23.0, "eval_accuracy": 0.7156926406926407, "eval_loss": 1.5094949007034302, "eval_runtime": 0.9498, "eval_samples_per_second": 126.338, "eval_steps_per_second": 8.423, "step": 437 }, { "epoch": 24.0, "grad_norm": 3.7643215656280518, "learning_rate": 1.1153415453527437e-05, "loss": 1.3144, "step": 456 }, { "epoch": 24.0, "eval_accuracy": 0.7156926406926407, "eval_loss": 1.5173320770263672, "eval_runtime": 0.918, "eval_samples_per_second": 130.722, "eval_steps_per_second": 8.715, "step": 456 }, { "epoch": 25.0, "grad_norm": 4.011295318603516, "learning_rate": 1.0727883538633819e-05, "loss": 1.297, "step": 475 }, { "epoch": 25.0, "eval_accuracy": 0.7215367965367966, "eval_loss": 1.4742799997329712, "eval_runtime": 0.9605, "eval_samples_per_second": 124.934, "eval_steps_per_second": 8.329, "step": 475 }, { "epoch": 26.0, "grad_norm": 3.695364475250244, "learning_rate": 1.0302351623740203e-05, "loss": 1.3343, "step": 494 }, { "epoch": 26.0, "eval_accuracy": 0.7112554112554113, "eval_loss": 1.5012328624725342, "eval_runtime": 0.9802, "eval_samples_per_second": 122.418, "eval_steps_per_second": 8.161, "step": 494 }, { "epoch": 27.0, "grad_norm": 3.5467514991760254, "learning_rate": 9.876819708846585e-06, "loss": 1.2949, "step": 513 }, { "epoch": 27.0, "eval_accuracy": 0.7146103896103896, "eval_loss": 1.4987872838974, "eval_runtime": 0.9828, "eval_samples_per_second": 122.102, "eval_steps_per_second": 8.14, "step": 513 }, { "epoch": 28.0, "grad_norm": 3.4811294078826904, "learning_rate": 9.451287793952969e-06, "loss": 1.3182, "step": 532 }, { "epoch": 28.0, "eval_accuracy": 0.7242424242424242, "eval_loss": 1.4198087453842163, "eval_runtime": 0.9426, "eval_samples_per_second": 127.306, "eval_steps_per_second": 8.487, "step": 532 }, { "epoch": 29.0, "grad_norm": 3.478583812713623, "learning_rate": 9.02575587905935e-06, "loss": 1.3005, "step": 551 }, { "epoch": 29.0, "eval_accuracy": 0.7161255411255412, "eval_loss": 1.4723750352859497, "eval_runtime": 0.9211, "eval_samples_per_second": 130.285, "eval_steps_per_second": 8.686, "step": 551 }, { "epoch": 30.0, "grad_norm": 3.6417274475097656, "learning_rate": 8.600223964165734e-06, "loss": 1.2821, "step": 570 }, { "epoch": 30.0, "eval_accuracy": 0.7204545454545455, "eval_loss": 1.4705064296722412, "eval_runtime": 0.9842, "eval_samples_per_second": 121.923, "eval_steps_per_second": 8.128, "step": 570 }, { "epoch": 31.0, "grad_norm": 3.6498682498931885, "learning_rate": 8.174692049272118e-06, "loss": 1.278, "step": 589 }, { "epoch": 31.0, "eval_accuracy": 0.7201298701298702, "eval_loss": 1.4779876470565796, "eval_runtime": 0.9416, "eval_samples_per_second": 127.449, "eval_steps_per_second": 8.497, "step": 589 }, { "epoch": 32.0, "grad_norm": 3.44429087638855, "learning_rate": 7.7491601343785e-06, "loss": 1.274, "step": 608 }, { "epoch": 32.0, "eval_accuracy": 0.7128787878787879, "eval_loss": 1.5007588863372803, "eval_runtime": 0.951, "eval_samples_per_second": 126.182, "eval_steps_per_second": 8.412, "step": 608 }, { "epoch": 33.0, "grad_norm": 3.3195676803588867, "learning_rate": 7.323628219484883e-06, "loss": 1.2849, "step": 627 }, { "epoch": 33.0, "eval_accuracy": 0.720021645021645, "eval_loss": 1.457064151763916, "eval_runtime": 0.9439, "eval_samples_per_second": 127.138, "eval_steps_per_second": 8.476, "step": 627 }, { "epoch": 34.0, "grad_norm": 3.599475145339966, "learning_rate": 6.8980963045912665e-06, "loss": 1.2607, "step": 646 }, { "epoch": 34.0, "eval_accuracy": 0.7246753246753247, "eval_loss": 1.4253478050231934, "eval_runtime": 0.9878, "eval_samples_per_second": 121.479, "eval_steps_per_second": 8.099, "step": 646 }, { "epoch": 35.0, "grad_norm": 3.3579301834106445, "learning_rate": 6.4725643896976485e-06, "loss": 1.2673, "step": 665 }, { "epoch": 35.0, "eval_accuracy": 0.7100649350649351, "eval_loss": 1.5111687183380127, "eval_runtime": 0.9938, "eval_samples_per_second": 120.753, "eval_steps_per_second": 8.05, "step": 665 }, { "epoch": 36.0, "grad_norm": 3.2128427028656006, "learning_rate": 6.047032474804032e-06, "loss": 1.259, "step": 684 }, { "epoch": 36.0, "eval_accuracy": 0.714935064935065, "eval_loss": 1.509379506111145, "eval_runtime": 0.985, "eval_samples_per_second": 121.825, "eval_steps_per_second": 8.122, "step": 684 }, { "epoch": 37.0, "grad_norm": 3.8870420455932617, "learning_rate": 5.621500559910414e-06, "loss": 1.2348, "step": 703 }, { "epoch": 37.0, "eval_accuracy": 0.7216450216450216, "eval_loss": 1.4843716621398926, "eval_runtime": 0.9616, "eval_samples_per_second": 124.787, "eval_steps_per_second": 8.319, "step": 703 }, { "epoch": 38.0, "grad_norm": 3.471414566040039, "learning_rate": 5.195968645016798e-06, "loss": 1.2561, "step": 722 }, { "epoch": 38.0, "eval_accuracy": 0.7170995670995671, "eval_loss": 1.4628422260284424, "eval_runtime": 0.9514, "eval_samples_per_second": 126.133, "eval_steps_per_second": 8.409, "step": 722 }, { "epoch": 39.0, "grad_norm": 3.5081968307495117, "learning_rate": 4.770436730123181e-06, "loss": 1.2464, "step": 741 }, { "epoch": 39.0, "eval_accuracy": 0.7182900432900433, "eval_loss": 1.471142292022705, "eval_runtime": 0.9807, "eval_samples_per_second": 122.358, "eval_steps_per_second": 8.157, "step": 741 }, { "epoch": 40.0, "grad_norm": 3.555746555328369, "learning_rate": 4.344904815229564e-06, "loss": 1.2483, "step": 760 }, { "epoch": 40.0, "eval_accuracy": 0.7228354978354978, "eval_loss": 1.461737871170044, "eval_runtime": 0.9684, "eval_samples_per_second": 123.91, "eval_steps_per_second": 8.261, "step": 760 }, { "epoch": 41.0, "grad_norm": 3.553662061691284, "learning_rate": 3.9193729003359465e-06, "loss": 1.2392, "step": 779 }, { "epoch": 41.0, "eval_accuracy": 0.7164502164502164, "eval_loss": 1.4649699926376343, "eval_runtime": 1.0032, "eval_samples_per_second": 119.615, "eval_steps_per_second": 7.974, "step": 779 }, { "epoch": 42.0, "grad_norm": 3.924736738204956, "learning_rate": 3.4938409854423293e-06, "loss": 1.2306, "step": 798 }, { "epoch": 42.0, "eval_accuracy": 0.7258658008658009, "eval_loss": 1.4045588970184326, "eval_runtime": 0.9561, "eval_samples_per_second": 125.506, "eval_steps_per_second": 8.367, "step": 798 }, { "epoch": 43.0, "grad_norm": 3.97886061668396, "learning_rate": 3.068309070548712e-06, "loss": 1.2328, "step": 817 }, { "epoch": 43.0, "eval_accuracy": 0.7140692640692641, "eval_loss": 1.4773471355438232, "eval_runtime": 0.9312, "eval_samples_per_second": 128.868, "eval_steps_per_second": 8.591, "step": 817 }, { "epoch": 44.0, "grad_norm": 3.708599805831909, "learning_rate": 2.642777155655095e-06, "loss": 1.2493, "step": 836 }, { "epoch": 44.0, "eval_accuracy": 0.7229437229437229, "eval_loss": 1.45064115524292, "eval_runtime": 0.9666, "eval_samples_per_second": 124.146, "eval_steps_per_second": 8.276, "step": 836 }, { "epoch": 45.0, "grad_norm": 3.3163161277770996, "learning_rate": 2.2172452407614783e-06, "loss": 1.2349, "step": 855 }, { "epoch": 45.0, "eval_accuracy": 0.7072510822510822, "eval_loss": 1.5113204717636108, "eval_runtime": 0.9644, "eval_samples_per_second": 124.432, "eval_steps_per_second": 8.295, "step": 855 }, { "epoch": 46.0, "grad_norm": 3.812030553817749, "learning_rate": 1.7917133258678612e-06, "loss": 1.2352, "step": 874 }, { "epoch": 46.0, "eval_accuracy": 0.7154761904761905, "eval_loss": 1.478694200515747, "eval_runtime": 1.0642, "eval_samples_per_second": 112.764, "eval_steps_per_second": 7.518, "step": 874 }, { "epoch": 47.0, "grad_norm": 3.363664388656616, "learning_rate": 1.3661814109742442e-06, "loss": 1.2469, "step": 893 }, { "epoch": 47.0, "eval_accuracy": 0.7175324675324676, "eval_loss": 1.4404964447021484, "eval_runtime": 0.9729, "eval_samples_per_second": 123.34, "eval_steps_per_second": 8.223, "step": 893 }, { "epoch": 48.0, "grad_norm": 3.373873710632324, "learning_rate": 9.406494960806272e-07, "loss": 1.2215, "step": 912 }, { "epoch": 48.0, "eval_accuracy": 0.7176406926406926, "eval_loss": 1.4719493389129639, "eval_runtime": 0.9636, "eval_samples_per_second": 124.529, "eval_steps_per_second": 8.302, "step": 912 }, { "epoch": 49.0, "grad_norm": 3.360140562057495, "learning_rate": 5.151175811870101e-07, "loss": 1.2238, "step": 931 }, { "epoch": 49.0, "eval_accuracy": 0.7194805194805195, "eval_loss": 1.4799143075942993, "eval_runtime": 0.9979, "eval_samples_per_second": 120.25, "eval_steps_per_second": 8.017, "step": 931 }, { "epoch": 50.0, "grad_norm": 3.6860737800598145, "learning_rate": 8.958566629339306e-08, "loss": 1.2371, "step": 950 }, { "epoch": 50.0, "eval_accuracy": 0.7123376623376624, "eval_loss": 1.488216519355774, "eval_runtime": 0.9315, "eval_samples_per_second": 128.825, "eval_steps_per_second": 8.588, "step": 950 }, { "epoch": 50.0, "step": 950, "total_flos": 7922583223296000.0, "train_loss": 1.3572578932109631, "train_runtime": 567.5136, "train_samples_per_second": 53.038, "train_steps_per_second": 1.674 } ], "logging_steps": 37, "max_steps": 950, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7922583223296000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }