| { | |
| "best_metric": 1.4045588970184326, | |
| "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/Continued_pretraining/TAPT/microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext/variant-tapt_base-LR_2e-05/checkpoint-798", | |
| "epoch": 50.0, | |
| "eval_steps": 500, | |
| "global_step": 950, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.445349931716919, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 1.7258, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.691991341991342, | |
| "eval_loss": 1.7872645854949951, | |
| "eval_runtime": 1.2711, | |
| "eval_samples_per_second": 94.406, | |
| "eval_steps_per_second": 6.294, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.7695751190185547, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 1.708, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6962121212121212, | |
| "eval_loss": 1.73982572555542, | |
| "eval_runtime": 0.9424, | |
| "eval_samples_per_second": 127.337, | |
| "eval_steps_per_second": 8.489, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 3.6540446281433105, | |
| "learning_rate": 1.929824561403509e-05, | |
| "loss": 1.6506, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6972943722943723, | |
| "eval_loss": 1.680162787437439, | |
| "eval_runtime": 0.9581, | |
| "eval_samples_per_second": 125.245, | |
| "eval_steps_per_second": 8.35, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 3.684107780456543, | |
| "learning_rate": 1.961926091825308e-05, | |
| "loss": 1.5883, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7041125541125541, | |
| "eval_loss": 1.6647979021072388, | |
| "eval_runtime": 1.1313, | |
| "eval_samples_per_second": 106.076, | |
| "eval_steps_per_second": 7.072, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 3.216653347015381, | |
| "learning_rate": 1.921612541993281e-05, | |
| "loss": 1.567, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.703030303030303, | |
| "eval_loss": 1.6342447996139526, | |
| "eval_runtime": 1.1214, | |
| "eval_samples_per_second": 107.012, | |
| "eval_steps_per_second": 7.134, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 3.901475191116333, | |
| "learning_rate": 1.8790593505039197e-05, | |
| "loss": 1.5485, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7146103896103896, | |
| "eval_loss": 1.542972207069397, | |
| "eval_runtime": 0.9254, | |
| "eval_samples_per_second": 129.672, | |
| "eval_steps_per_second": 8.645, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 3.615143299102783, | |
| "learning_rate": 1.836506159014558e-05, | |
| "loss": 1.5105, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7112554112554113, | |
| "eval_loss": 1.529628872871399, | |
| "eval_runtime": 0.9685, | |
| "eval_samples_per_second": 123.909, | |
| "eval_steps_per_second": 8.261, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 3.3348517417907715, | |
| "learning_rate": 1.793952967525196e-05, | |
| "loss": 1.4635, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7077922077922078, | |
| "eval_loss": 1.6213933229446411, | |
| "eval_runtime": 0.964, | |
| "eval_samples_per_second": 124.478, | |
| "eval_steps_per_second": 8.299, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 3.2985076904296875, | |
| "learning_rate": 1.7513997760358343e-05, | |
| "loss": 1.4841, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.712012987012987, | |
| "eval_loss": 1.521169662475586, | |
| "eval_runtime": 0.9525, | |
| "eval_samples_per_second": 125.98, | |
| "eval_steps_per_second": 8.399, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 3.7003631591796875, | |
| "learning_rate": 1.708846584546473e-05, | |
| "loss": 1.4663, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7034632034632035, | |
| "eval_loss": 1.5628341436386108, | |
| "eval_runtime": 0.9596, | |
| "eval_samples_per_second": 125.047, | |
| "eval_steps_per_second": 8.336, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 3.4336016178131104, | |
| "learning_rate": 1.666293393057111e-05, | |
| "loss": 1.4282, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7164502164502164, | |
| "eval_loss": 1.5350743532180786, | |
| "eval_runtime": 0.9381, | |
| "eval_samples_per_second": 127.92, | |
| "eval_steps_per_second": 8.528, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 3.400257110595703, | |
| "learning_rate": 1.6237402015677492e-05, | |
| "loss": 1.4511, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7095238095238096, | |
| "eval_loss": 1.5299878120422363, | |
| "eval_runtime": 0.9815, | |
| "eval_samples_per_second": 122.261, | |
| "eval_steps_per_second": 8.151, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 3.7612063884735107, | |
| "learning_rate": 1.5811870100783874e-05, | |
| "loss": 1.4318, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7148268398268398, | |
| "eval_loss": 1.5255870819091797, | |
| "eval_runtime": 1.0085, | |
| "eval_samples_per_second": 118.989, | |
| "eval_steps_per_second": 7.933, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 4.146888732910156, | |
| "learning_rate": 1.5408734602463605e-05, | |
| "loss": 1.4241, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7146103896103896, | |
| "eval_loss": 1.4872480630874634, | |
| "eval_runtime": 0.9584, | |
| "eval_samples_per_second": 125.214, | |
| "eval_steps_per_second": 8.348, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 3.4961729049682617, | |
| "learning_rate": 1.498320268756999e-05, | |
| "loss": 1.4235, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7087662337662337, | |
| "eval_loss": 1.543083667755127, | |
| "eval_runtime": 0.9678, | |
| "eval_samples_per_second": 123.991, | |
| "eval_steps_per_second": 8.266, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "grad_norm": 3.633533477783203, | |
| "learning_rate": 1.4557670772676373e-05, | |
| "loss": 1.3905, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7096320346320346, | |
| "eval_loss": 1.5830901861190796, | |
| "eval_runtime": 0.9543, | |
| "eval_samples_per_second": 125.744, | |
| "eval_steps_per_second": 8.383, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "grad_norm": 3.480095863342285, | |
| "learning_rate": 1.4132138857782756e-05, | |
| "loss": 1.3526, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7175324675324676, | |
| "eval_loss": 1.4920153617858887, | |
| "eval_runtime": 0.9587, | |
| "eval_samples_per_second": 125.168, | |
| "eval_steps_per_second": 8.345, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "grad_norm": 3.672355890274048, | |
| "learning_rate": 1.3706606942889138e-05, | |
| "loss": 1.3733, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7103896103896103, | |
| "eval_loss": 1.5017799139022827, | |
| "eval_runtime": 0.947, | |
| "eval_samples_per_second": 126.722, | |
| "eval_steps_per_second": 8.448, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "grad_norm": 3.355855941772461, | |
| "learning_rate": 1.3281075027995522e-05, | |
| "loss": 1.3673, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7179653679653679, | |
| "eval_loss": 1.4765794277191162, | |
| "eval_runtime": 0.9661, | |
| "eval_samples_per_second": 124.207, | |
| "eval_steps_per_second": 8.28, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 3.565549850463867, | |
| "learning_rate": 1.2855543113101904e-05, | |
| "loss": 1.3631, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7141774891774891, | |
| "eval_loss": 1.4877734184265137, | |
| "eval_runtime": 1.0066, | |
| "eval_samples_per_second": 119.211, | |
| "eval_steps_per_second": 7.947, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "grad_norm": 3.4417223930358887, | |
| "learning_rate": 1.2430011198208288e-05, | |
| "loss": 1.3709, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7038961038961039, | |
| "eval_loss": 1.542179822921753, | |
| "eval_runtime": 1.021, | |
| "eval_samples_per_second": 117.535, | |
| "eval_steps_per_second": 7.836, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "grad_norm": 3.8988301753997803, | |
| "learning_rate": 1.2004479283314671e-05, | |
| "loss": 1.3408, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7205627705627705, | |
| "eval_loss": 1.4855471849441528, | |
| "eval_runtime": 0.9705, | |
| "eval_samples_per_second": 123.653, | |
| "eval_steps_per_second": 8.244, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "grad_norm": 3.567075729370117, | |
| "learning_rate": 1.1578947368421053e-05, | |
| "loss": 1.3311, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7156926406926407, | |
| "eval_loss": 1.5094949007034302, | |
| "eval_runtime": 0.9498, | |
| "eval_samples_per_second": 126.338, | |
| "eval_steps_per_second": 8.423, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "grad_norm": 3.7643215656280518, | |
| "learning_rate": 1.1153415453527437e-05, | |
| "loss": 1.3144, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7156926406926407, | |
| "eval_loss": 1.5173320770263672, | |
| "eval_runtime": 0.918, | |
| "eval_samples_per_second": 130.722, | |
| "eval_steps_per_second": 8.715, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "grad_norm": 4.011295318603516, | |
| "learning_rate": 1.0727883538633819e-05, | |
| "loss": 1.297, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7215367965367966, | |
| "eval_loss": 1.4742799997329712, | |
| "eval_runtime": 0.9605, | |
| "eval_samples_per_second": 124.934, | |
| "eval_steps_per_second": 8.329, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "grad_norm": 3.695364475250244, | |
| "learning_rate": 1.0302351623740203e-05, | |
| "loss": 1.3343, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7112554112554113, | |
| "eval_loss": 1.5012328624725342, | |
| "eval_runtime": 0.9802, | |
| "eval_samples_per_second": 122.418, | |
| "eval_steps_per_second": 8.161, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "grad_norm": 3.5467514991760254, | |
| "learning_rate": 9.876819708846585e-06, | |
| "loss": 1.2949, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7146103896103896, | |
| "eval_loss": 1.4987872838974, | |
| "eval_runtime": 0.9828, | |
| "eval_samples_per_second": 122.102, | |
| "eval_steps_per_second": 8.14, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "grad_norm": 3.4811294078826904, | |
| "learning_rate": 9.451287793952969e-06, | |
| "loss": 1.3182, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7242424242424242, | |
| "eval_loss": 1.4198087453842163, | |
| "eval_runtime": 0.9426, | |
| "eval_samples_per_second": 127.306, | |
| "eval_steps_per_second": 8.487, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "grad_norm": 3.478583812713623, | |
| "learning_rate": 9.02575587905935e-06, | |
| "loss": 1.3005, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7161255411255412, | |
| "eval_loss": 1.4723750352859497, | |
| "eval_runtime": 0.9211, | |
| "eval_samples_per_second": 130.285, | |
| "eval_steps_per_second": 8.686, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 3.6417274475097656, | |
| "learning_rate": 8.600223964165734e-06, | |
| "loss": 1.2821, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7204545454545455, | |
| "eval_loss": 1.4705064296722412, | |
| "eval_runtime": 0.9842, | |
| "eval_samples_per_second": 121.923, | |
| "eval_steps_per_second": 8.128, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "grad_norm": 3.6498682498931885, | |
| "learning_rate": 8.174692049272118e-06, | |
| "loss": 1.278, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7201298701298702, | |
| "eval_loss": 1.4779876470565796, | |
| "eval_runtime": 0.9416, | |
| "eval_samples_per_second": 127.449, | |
| "eval_steps_per_second": 8.497, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "grad_norm": 3.44429087638855, | |
| "learning_rate": 7.7491601343785e-06, | |
| "loss": 1.274, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7128787878787879, | |
| "eval_loss": 1.5007588863372803, | |
| "eval_runtime": 0.951, | |
| "eval_samples_per_second": 126.182, | |
| "eval_steps_per_second": 8.412, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "grad_norm": 3.3195676803588867, | |
| "learning_rate": 7.323628219484883e-06, | |
| "loss": 1.2849, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.720021645021645, | |
| "eval_loss": 1.457064151763916, | |
| "eval_runtime": 0.9439, | |
| "eval_samples_per_second": 127.138, | |
| "eval_steps_per_second": 8.476, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "grad_norm": 3.599475145339966, | |
| "learning_rate": 6.8980963045912665e-06, | |
| "loss": 1.2607, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7246753246753247, | |
| "eval_loss": 1.4253478050231934, | |
| "eval_runtime": 0.9878, | |
| "eval_samples_per_second": 121.479, | |
| "eval_steps_per_second": 8.099, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "grad_norm": 3.3579301834106445, | |
| "learning_rate": 6.4725643896976485e-06, | |
| "loss": 1.2673, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7100649350649351, | |
| "eval_loss": 1.5111687183380127, | |
| "eval_runtime": 0.9938, | |
| "eval_samples_per_second": 120.753, | |
| "eval_steps_per_second": 8.05, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "grad_norm": 3.2128427028656006, | |
| "learning_rate": 6.047032474804032e-06, | |
| "loss": 1.259, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.714935064935065, | |
| "eval_loss": 1.509379506111145, | |
| "eval_runtime": 0.985, | |
| "eval_samples_per_second": 121.825, | |
| "eval_steps_per_second": 8.122, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "grad_norm": 3.8870420455932617, | |
| "learning_rate": 5.621500559910414e-06, | |
| "loss": 1.2348, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7216450216450216, | |
| "eval_loss": 1.4843716621398926, | |
| "eval_runtime": 0.9616, | |
| "eval_samples_per_second": 124.787, | |
| "eval_steps_per_second": 8.319, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "grad_norm": 3.471414566040039, | |
| "learning_rate": 5.195968645016798e-06, | |
| "loss": 1.2561, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7170995670995671, | |
| "eval_loss": 1.4628422260284424, | |
| "eval_runtime": 0.9514, | |
| "eval_samples_per_second": 126.133, | |
| "eval_steps_per_second": 8.409, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "grad_norm": 3.5081968307495117, | |
| "learning_rate": 4.770436730123181e-06, | |
| "loss": 1.2464, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.7182900432900433, | |
| "eval_loss": 1.471142292022705, | |
| "eval_runtime": 0.9807, | |
| "eval_samples_per_second": 122.358, | |
| "eval_steps_per_second": 8.157, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 3.555746555328369, | |
| "learning_rate": 4.344904815229564e-06, | |
| "loss": 1.2483, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7228354978354978, | |
| "eval_loss": 1.461737871170044, | |
| "eval_runtime": 0.9684, | |
| "eval_samples_per_second": 123.91, | |
| "eval_steps_per_second": 8.261, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "grad_norm": 3.553662061691284, | |
| "learning_rate": 3.9193729003359465e-06, | |
| "loss": 1.2392, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.7164502164502164, | |
| "eval_loss": 1.4649699926376343, | |
| "eval_runtime": 1.0032, | |
| "eval_samples_per_second": 119.615, | |
| "eval_steps_per_second": 7.974, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "grad_norm": 3.924736738204956, | |
| "learning_rate": 3.4938409854423293e-06, | |
| "loss": 1.2306, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.7258658008658009, | |
| "eval_loss": 1.4045588970184326, | |
| "eval_runtime": 0.9561, | |
| "eval_samples_per_second": 125.506, | |
| "eval_steps_per_second": 8.367, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "grad_norm": 3.97886061668396, | |
| "learning_rate": 3.068309070548712e-06, | |
| "loss": 1.2328, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.7140692640692641, | |
| "eval_loss": 1.4773471355438232, | |
| "eval_runtime": 0.9312, | |
| "eval_samples_per_second": 128.868, | |
| "eval_steps_per_second": 8.591, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "grad_norm": 3.708599805831909, | |
| "learning_rate": 2.642777155655095e-06, | |
| "loss": 1.2493, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7229437229437229, | |
| "eval_loss": 1.45064115524292, | |
| "eval_runtime": 0.9666, | |
| "eval_samples_per_second": 124.146, | |
| "eval_steps_per_second": 8.276, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "grad_norm": 3.3163161277770996, | |
| "learning_rate": 2.2172452407614783e-06, | |
| "loss": 1.2349, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.7072510822510822, | |
| "eval_loss": 1.5113204717636108, | |
| "eval_runtime": 0.9644, | |
| "eval_samples_per_second": 124.432, | |
| "eval_steps_per_second": 8.295, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "grad_norm": 3.812030553817749, | |
| "learning_rate": 1.7917133258678612e-06, | |
| "loss": 1.2352, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.7154761904761905, | |
| "eval_loss": 1.478694200515747, | |
| "eval_runtime": 1.0642, | |
| "eval_samples_per_second": 112.764, | |
| "eval_steps_per_second": 7.518, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "grad_norm": 3.363664388656616, | |
| "learning_rate": 1.3661814109742442e-06, | |
| "loss": 1.2469, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.7175324675324676, | |
| "eval_loss": 1.4404964447021484, | |
| "eval_runtime": 0.9729, | |
| "eval_samples_per_second": 123.34, | |
| "eval_steps_per_second": 8.223, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "grad_norm": 3.373873710632324, | |
| "learning_rate": 9.406494960806272e-07, | |
| "loss": 1.2215, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.7176406926406926, | |
| "eval_loss": 1.4719493389129639, | |
| "eval_runtime": 0.9636, | |
| "eval_samples_per_second": 124.529, | |
| "eval_steps_per_second": 8.302, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "grad_norm": 3.360140562057495, | |
| "learning_rate": 5.151175811870101e-07, | |
| "loss": 1.2238, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.7194805194805195, | |
| "eval_loss": 1.4799143075942993, | |
| "eval_runtime": 0.9979, | |
| "eval_samples_per_second": 120.25, | |
| "eval_steps_per_second": 8.017, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 3.6860737800598145, | |
| "learning_rate": 8.958566629339306e-08, | |
| "loss": 1.2371, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.7123376623376624, | |
| "eval_loss": 1.488216519355774, | |
| "eval_runtime": 0.9315, | |
| "eval_samples_per_second": 128.825, | |
| "eval_steps_per_second": 8.588, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "step": 950, | |
| "total_flos": 7922583223296000.0, | |
| "train_loss": 1.3572578932109631, | |
| "train_runtime": 567.5136, | |
| "train_samples_per_second": 53.038, | |
| "train_steps_per_second": 1.674 | |
| } | |
| ], | |
| "logging_steps": 37, | |
| "max_steps": 950, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7922583223296000.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |