| { | |
| "best_metric": 1.8658331632614136, | |
| "best_model_checkpoint": "/nfs/production/literature/amina-mardiyyah/new_data/OT-Entity-Extraction-Pipeline/model_outputs/Continued_pretraining/TAPT/bioformers/bioformer-16L/Mardiyyah/TAPT_data_V2_split/tapt_base_LR-2e-05/checkpoint-255", | |
| "epoch": 49.94117647058823, | |
| "eval_steps": 1, | |
| "global_step": 450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.10457516339869281, | |
| "eval_loss": 2.221620798110962, | |
| "eval_runtime": 2.4071, | |
| "eval_samples_per_second": 808.035, | |
| "eval_steps_per_second": 12.879, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.20915032679738563, | |
| "eval_loss": 2.1887502670288086, | |
| "eval_runtime": 2.4126, | |
| "eval_samples_per_second": 806.196, | |
| "eval_steps_per_second": 12.849, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.3137254901960784, | |
| "eval_loss": 2.173266887664795, | |
| "eval_runtime": 2.4488, | |
| "eval_samples_per_second": 794.257, | |
| "eval_steps_per_second": 12.659, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.41830065359477125, | |
| "eval_loss": 2.1297478675842285, | |
| "eval_runtime": 2.3778, | |
| "eval_samples_per_second": 817.994, | |
| "eval_steps_per_second": 13.037, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.5228758169934641, | |
| "eval_loss": 2.192237377166748, | |
| "eval_runtime": 2.3912, | |
| "eval_samples_per_second": 813.415, | |
| "eval_steps_per_second": 12.964, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.6274509803921569, | |
| "eval_loss": 2.1468276977539062, | |
| "eval_runtime": 2.3902, | |
| "eval_samples_per_second": 813.739, | |
| "eval_steps_per_second": 12.97, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.7320261437908496, | |
| "eval_loss": 2.1432690620422363, | |
| "eval_runtime": 2.3678, | |
| "eval_samples_per_second": 821.446, | |
| "eval_steps_per_second": 13.092, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.8366013071895425, | |
| "eval_loss": 2.0927038192749023, | |
| "eval_runtime": 2.3704, | |
| "eval_samples_per_second": 820.552, | |
| "eval_steps_per_second": 13.078, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "eval_loss": 2.124303102493286, | |
| "eval_runtime": 2.4224, | |
| "eval_samples_per_second": 802.925, | |
| "eval_steps_per_second": 12.797, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 2.930349588394165, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 2.4847, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 1.1045751633986929, | |
| "eval_loss": 2.1266961097717285, | |
| "eval_runtime": 2.3993, | |
| "eval_samples_per_second": 810.647, | |
| "eval_steps_per_second": 12.92, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.2091503267973855, | |
| "eval_loss": 2.081995964050293, | |
| "eval_runtime": 2.4023, | |
| "eval_samples_per_second": 809.627, | |
| "eval_steps_per_second": 12.904, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.3137254901960784, | |
| "eval_loss": 2.0737693309783936, | |
| "eval_runtime": 2.4011, | |
| "eval_samples_per_second": 810.042, | |
| "eval_steps_per_second": 12.911, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 1.4183006535947713, | |
| "eval_loss": 2.0427086353302, | |
| "eval_runtime": 2.4076, | |
| "eval_samples_per_second": 807.874, | |
| "eval_steps_per_second": 12.876, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 1.522875816993464, | |
| "eval_loss": 2.056819200515747, | |
| "eval_runtime": 2.3991, | |
| "eval_samples_per_second": 810.727, | |
| "eval_steps_per_second": 12.922, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.6274509803921569, | |
| "eval_loss": 2.083451747894287, | |
| "eval_runtime": 2.4323, | |
| "eval_samples_per_second": 799.665, | |
| "eval_steps_per_second": 12.745, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 1.7320261437908497, | |
| "eval_loss": 2.078913450241089, | |
| "eval_runtime": 2.4756, | |
| "eval_samples_per_second": 785.669, | |
| "eval_steps_per_second": 12.522, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 1.8366013071895426, | |
| "eval_loss": 2.067417621612549, | |
| "eval_runtime": 2.438, | |
| "eval_samples_per_second": 797.798, | |
| "eval_steps_per_second": 12.716, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "eval_loss": 2.0401482582092285, | |
| "eval_runtime": 2.4764, | |
| "eval_samples_per_second": 785.419, | |
| "eval_steps_per_second": 12.518, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 1.9967031478881836, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 2.4101, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.104575163398693, | |
| "eval_loss": 2.0421407222747803, | |
| "eval_runtime": 2.4395, | |
| "eval_samples_per_second": 797.31, | |
| "eval_steps_per_second": 12.708, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 2.2091503267973858, | |
| "eval_loss": 2.0762155055999756, | |
| "eval_runtime": 2.4888, | |
| "eval_samples_per_second": 781.507, | |
| "eval_steps_per_second": 12.456, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.313725490196078, | |
| "eval_loss": 2.006462574005127, | |
| "eval_runtime": 2.5161, | |
| "eval_samples_per_second": 773.026, | |
| "eval_steps_per_second": 12.321, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 2.418300653594771, | |
| "eval_loss": 2.0763015747070312, | |
| "eval_runtime": 2.4587, | |
| "eval_samples_per_second": 791.08, | |
| "eval_steps_per_second": 12.608, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.522875816993464, | |
| "eval_loss": 2.0424351692199707, | |
| "eval_runtime": 2.4605, | |
| "eval_samples_per_second": 790.5, | |
| "eval_steps_per_second": 12.599, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.627450980392157, | |
| "eval_loss": 2.031003952026367, | |
| "eval_runtime": 2.4119, | |
| "eval_samples_per_second": 806.408, | |
| "eval_steps_per_second": 12.853, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 2.7320261437908497, | |
| "eval_loss": 2.0873942375183105, | |
| "eval_runtime": 2.4077, | |
| "eval_samples_per_second": 807.816, | |
| "eval_steps_per_second": 12.875, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 2.8366013071895426, | |
| "eval_loss": 2.0235297679901123, | |
| "eval_runtime": 2.3945, | |
| "eval_samples_per_second": 812.27, | |
| "eval_steps_per_second": 12.946, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "eval_loss": 2.059739589691162, | |
| "eval_runtime": 2.4114, | |
| "eval_samples_per_second": 806.602, | |
| "eval_steps_per_second": 12.856, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 2.4962875843048096, | |
| "learning_rate": 2e-05, | |
| "loss": 2.3677, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 3.104575163398693, | |
| "eval_loss": 1.986527442932129, | |
| "eval_runtime": 2.3999, | |
| "eval_samples_per_second": 810.462, | |
| "eval_steps_per_second": 12.917, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 3.2091503267973858, | |
| "eval_loss": 2.0295257568359375, | |
| "eval_runtime": 2.4114, | |
| "eval_samples_per_second": 806.573, | |
| "eval_steps_per_second": 12.855, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 3.313725490196078, | |
| "eval_loss": 2.029600143432617, | |
| "eval_runtime": 2.3867, | |
| "eval_samples_per_second": 814.924, | |
| "eval_steps_per_second": 12.989, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 3.418300653594771, | |
| "eval_loss": 2.0018720626831055, | |
| "eval_runtime": 2.3894, | |
| "eval_samples_per_second": 814.014, | |
| "eval_steps_per_second": 12.974, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 3.522875816993464, | |
| "eval_loss": 1.9695795774459839, | |
| "eval_runtime": 2.3919, | |
| "eval_samples_per_second": 813.157, | |
| "eval_steps_per_second": 12.96, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 3.627450980392157, | |
| "eval_loss": 2.0265488624572754, | |
| "eval_runtime": 2.426, | |
| "eval_samples_per_second": 801.734, | |
| "eval_steps_per_second": 12.778, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 3.7320261437908497, | |
| "eval_loss": 2.010695457458496, | |
| "eval_runtime": 2.429, | |
| "eval_samples_per_second": 800.731, | |
| "eval_steps_per_second": 12.762, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 3.8366013071895426, | |
| "eval_loss": 2.034428119659424, | |
| "eval_runtime": 2.4086, | |
| "eval_samples_per_second": 807.51, | |
| "eval_steps_per_second": 12.87, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 3.9411764705882355, | |
| "eval_loss": 2.0281381607055664, | |
| "eval_runtime": 2.4118, | |
| "eval_samples_per_second": 806.449, | |
| "eval_steps_per_second": 12.853, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 3.9411764705882355, | |
| "grad_norm": 1.8768209218978882, | |
| "learning_rate": 1.9574468085106384e-05, | |
| "loss": 2.2639, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 4.104575163398692, | |
| "eval_loss": 2.0171053409576416, | |
| "eval_runtime": 2.4083, | |
| "eval_samples_per_second": 807.623, | |
| "eval_steps_per_second": 12.872, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 4.209150326797386, | |
| "eval_loss": 2.0344126224517822, | |
| "eval_runtime": 2.4108, | |
| "eval_samples_per_second": 806.785, | |
| "eval_steps_per_second": 12.859, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 4.313725490196078, | |
| "eval_loss": 1.9913954734802246, | |
| "eval_runtime": 2.3858, | |
| "eval_samples_per_second": 815.248, | |
| "eval_steps_per_second": 12.994, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 4.4183006535947715, | |
| "eval_loss": 1.9855905771255493, | |
| "eval_runtime": 2.3822, | |
| "eval_samples_per_second": 816.455, | |
| "eval_steps_per_second": 13.013, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.522875816993464, | |
| "eval_loss": 2.0357260704040527, | |
| "eval_runtime": 2.3873, | |
| "eval_samples_per_second": 814.74, | |
| "eval_steps_per_second": 12.986, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 4.627450980392156, | |
| "eval_loss": 2.028900384902954, | |
| "eval_runtime": 2.4177, | |
| "eval_samples_per_second": 804.49, | |
| "eval_steps_per_second": 12.822, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 4.73202614379085, | |
| "eval_loss": 1.9714045524597168, | |
| "eval_runtime": 2.3849, | |
| "eval_samples_per_second": 815.537, | |
| "eval_steps_per_second": 12.998, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 4.836601307189542, | |
| "eval_loss": 1.9895257949829102, | |
| "eval_runtime": 2.4769, | |
| "eval_samples_per_second": 785.249, | |
| "eval_steps_per_second": 12.516, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "eval_loss": 1.990486741065979, | |
| "eval_runtime": 2.4263, | |
| "eval_samples_per_second": 801.627, | |
| "eval_steps_per_second": 12.777, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.9411764705882355, | |
| "grad_norm": 1.8783236742019653, | |
| "learning_rate": 1.914893617021277e-05, | |
| "loss": 2.2037, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 5.104575163398692, | |
| "eval_loss": 1.9589457511901855, | |
| "eval_runtime": 2.4045, | |
| "eval_samples_per_second": 808.887, | |
| "eval_steps_per_second": 12.892, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 5.209150326797386, | |
| "eval_loss": 1.9864917993545532, | |
| "eval_runtime": 2.4112, | |
| "eval_samples_per_second": 806.648, | |
| "eval_steps_per_second": 12.857, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 5.313725490196078, | |
| "eval_loss": 2.0113699436187744, | |
| "eval_runtime": 2.4027, | |
| "eval_samples_per_second": 809.512, | |
| "eval_steps_per_second": 12.902, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 5.4183006535947715, | |
| "eval_loss": 2.0007834434509277, | |
| "eval_runtime": 2.4133, | |
| "eval_samples_per_second": 805.942, | |
| "eval_steps_per_second": 12.845, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 5.522875816993464, | |
| "eval_loss": 1.9577592611312866, | |
| "eval_runtime": 2.3791, | |
| "eval_samples_per_second": 817.533, | |
| "eval_steps_per_second": 13.03, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 5.627450980392156, | |
| "eval_loss": 2.029423236846924, | |
| "eval_runtime": 2.3902, | |
| "eval_samples_per_second": 813.74, | |
| "eval_steps_per_second": 12.97, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 5.73202614379085, | |
| "eval_loss": 1.9585332870483398, | |
| "eval_runtime": 2.4118, | |
| "eval_samples_per_second": 806.436, | |
| "eval_steps_per_second": 12.853, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 5.836601307189542, | |
| "eval_loss": 1.9783401489257812, | |
| "eval_runtime": 2.4264, | |
| "eval_samples_per_second": 801.594, | |
| "eval_steps_per_second": 12.776, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 5.9411764705882355, | |
| "eval_loss": 1.9880473613739014, | |
| "eval_runtime": 2.488, | |
| "eval_samples_per_second": 781.753, | |
| "eval_steps_per_second": 12.46, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 5.9411764705882355, | |
| "grad_norm": 1.8594753742218018, | |
| "learning_rate": 1.872340425531915e-05, | |
| "loss": 2.16, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 6.104575163398692, | |
| "eval_loss": 2.0060460567474365, | |
| "eval_runtime": 2.4336, | |
| "eval_samples_per_second": 799.234, | |
| "eval_steps_per_second": 12.738, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 6.209150326797386, | |
| "eval_loss": 1.9557570219039917, | |
| "eval_runtime": 2.4097, | |
| "eval_samples_per_second": 807.143, | |
| "eval_steps_per_second": 12.864, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 6.313725490196078, | |
| "eval_loss": 1.9664386510849, | |
| "eval_runtime": 2.4059, | |
| "eval_samples_per_second": 808.441, | |
| "eval_steps_per_second": 12.885, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 6.4183006535947715, | |
| "eval_loss": 1.920135498046875, | |
| "eval_runtime": 2.4056, | |
| "eval_samples_per_second": 808.538, | |
| "eval_steps_per_second": 12.887, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 6.522875816993464, | |
| "eval_loss": 1.9815952777862549, | |
| "eval_runtime": 2.4122, | |
| "eval_samples_per_second": 806.325, | |
| "eval_steps_per_second": 12.851, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 6.627450980392156, | |
| "eval_loss": 1.9681768417358398, | |
| "eval_runtime": 2.3867, | |
| "eval_samples_per_second": 814.941, | |
| "eval_steps_per_second": 12.989, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.73202614379085, | |
| "eval_loss": 1.9605098962783813, | |
| "eval_runtime": 2.3875, | |
| "eval_samples_per_second": 814.648, | |
| "eval_steps_per_second": 12.984, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 6.836601307189542, | |
| "eval_loss": 1.9233237504959106, | |
| "eval_runtime": 2.385, | |
| "eval_samples_per_second": 815.51, | |
| "eval_steps_per_second": 12.998, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 6.9411764705882355, | |
| "eval_loss": 1.9687212705612183, | |
| "eval_runtime": 2.3854, | |
| "eval_samples_per_second": 815.361, | |
| "eval_steps_per_second": 12.995, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 6.9411764705882355, | |
| "grad_norm": 1.886400818824768, | |
| "learning_rate": 1.8297872340425533e-05, | |
| "loss": 2.1108, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 7.104575163398692, | |
| "eval_loss": 1.9986543655395508, | |
| "eval_runtime": 2.4144, | |
| "eval_samples_per_second": 805.581, | |
| "eval_steps_per_second": 12.84, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 7.209150326797386, | |
| "eval_loss": 2.002251386642456, | |
| "eval_runtime": 2.4255, | |
| "eval_samples_per_second": 801.892, | |
| "eval_steps_per_second": 12.781, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 7.313725490196078, | |
| "eval_loss": 1.9626870155334473, | |
| "eval_runtime": 2.4235, | |
| "eval_samples_per_second": 802.565, | |
| "eval_steps_per_second": 12.792, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 7.4183006535947715, | |
| "eval_loss": 2.0214684009552, | |
| "eval_runtime": 2.4109, | |
| "eval_samples_per_second": 806.764, | |
| "eval_steps_per_second": 12.858, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 7.522875816993464, | |
| "eval_loss": 1.961344599723816, | |
| "eval_runtime": 2.4317, | |
| "eval_samples_per_second": 799.86, | |
| "eval_steps_per_second": 12.748, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 7.627450980392156, | |
| "eval_loss": 2.026102066040039, | |
| "eval_runtime": 2.4027, | |
| "eval_samples_per_second": 809.502, | |
| "eval_steps_per_second": 12.902, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 7.73202614379085, | |
| "eval_loss": 1.9625698328018188, | |
| "eval_runtime": 2.3794, | |
| "eval_samples_per_second": 817.425, | |
| "eval_steps_per_second": 13.028, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 7.836601307189542, | |
| "eval_loss": 2.000683546066284, | |
| "eval_runtime": 2.3828, | |
| "eval_samples_per_second": 816.278, | |
| "eval_steps_per_second": 13.01, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 7.9411764705882355, | |
| "eval_loss": 1.9403586387634277, | |
| "eval_runtime": 2.383, | |
| "eval_samples_per_second": 816.21, | |
| "eval_steps_per_second": 13.009, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 7.9411764705882355, | |
| "grad_norm": 3.798304319381714, | |
| "learning_rate": 1.7872340425531915e-05, | |
| "loss": 2.0949, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 8.104575163398692, | |
| "eval_loss": 1.994275450706482, | |
| "eval_runtime": 2.3862, | |
| "eval_samples_per_second": 815.09, | |
| "eval_steps_per_second": 12.991, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 8.209150326797385, | |
| "eval_loss": 2.0442616939544678, | |
| "eval_runtime": 2.3871, | |
| "eval_samples_per_second": 814.81, | |
| "eval_steps_per_second": 12.987, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 8.313725490196079, | |
| "eval_loss": 1.99091637134552, | |
| "eval_runtime": 2.3849, | |
| "eval_samples_per_second": 815.556, | |
| "eval_steps_per_second": 12.999, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 8.418300653594772, | |
| "eval_loss": 1.9789609909057617, | |
| "eval_runtime": 2.4032, | |
| "eval_samples_per_second": 809.347, | |
| "eval_steps_per_second": 12.9, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 8.522875816993464, | |
| "eval_loss": 1.9505332708358765, | |
| "eval_runtime": 2.457, | |
| "eval_samples_per_second": 791.608, | |
| "eval_steps_per_second": 12.617, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 8.627450980392156, | |
| "eval_loss": 1.9477442502975464, | |
| "eval_runtime": 2.4164, | |
| "eval_samples_per_second": 804.91, | |
| "eval_steps_per_second": 12.829, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 8.732026143790849, | |
| "eval_loss": 2.027162790298462, | |
| "eval_runtime": 2.4059, | |
| "eval_samples_per_second": 808.413, | |
| "eval_steps_per_second": 12.885, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 8.836601307189543, | |
| "eval_loss": 1.954852819442749, | |
| "eval_runtime": 2.4078, | |
| "eval_samples_per_second": 807.8, | |
| "eval_steps_per_second": 12.875, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.941176470588236, | |
| "eval_loss": 1.9641313552856445, | |
| "eval_runtime": 2.4106, | |
| "eval_samples_per_second": 806.865, | |
| "eval_steps_per_second": 12.86, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 8.941176470588236, | |
| "grad_norm": 1.9192023277282715, | |
| "learning_rate": 1.74468085106383e-05, | |
| "loss": 2.0617, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 9.104575163398692, | |
| "eval_loss": 1.9859141111373901, | |
| "eval_runtime": 2.3909, | |
| "eval_samples_per_second": 813.488, | |
| "eval_steps_per_second": 12.966, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 9.209150326797385, | |
| "eval_loss": 1.937601089477539, | |
| "eval_runtime": 2.3889, | |
| "eval_samples_per_second": 814.196, | |
| "eval_steps_per_second": 12.977, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 9.313725490196079, | |
| "eval_loss": 1.9699262380599976, | |
| "eval_runtime": 2.3832, | |
| "eval_samples_per_second": 816.116, | |
| "eval_steps_per_second": 13.008, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 9.418300653594772, | |
| "eval_loss": 1.9334497451782227, | |
| "eval_runtime": 2.4366, | |
| "eval_samples_per_second": 798.247, | |
| "eval_steps_per_second": 12.723, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 9.522875816993464, | |
| "eval_loss": 1.9708276987075806, | |
| "eval_runtime": 2.3835, | |
| "eval_samples_per_second": 816.03, | |
| "eval_steps_per_second": 13.006, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 9.627450980392156, | |
| "eval_loss": 1.970037817955017, | |
| "eval_runtime": 2.4674, | |
| "eval_samples_per_second": 788.282, | |
| "eval_steps_per_second": 12.564, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 9.732026143790849, | |
| "eval_loss": 1.9634466171264648, | |
| "eval_runtime": 2.4497, | |
| "eval_samples_per_second": 793.975, | |
| "eval_steps_per_second": 12.655, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 9.836601307189543, | |
| "eval_loss": 1.92203688621521, | |
| "eval_runtime": 2.4109, | |
| "eval_samples_per_second": 806.761, | |
| "eval_steps_per_second": 12.858, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 9.941176470588236, | |
| "eval_loss": 1.966900110244751, | |
| "eval_runtime": 2.4107, | |
| "eval_samples_per_second": 806.822, | |
| "eval_steps_per_second": 12.859, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 9.941176470588236, | |
| "grad_norm": 2.050672769546509, | |
| "learning_rate": 1.7021276595744682e-05, | |
| "loss": 2.0509, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 10.104575163398692, | |
| "eval_loss": 1.956833004951477, | |
| "eval_runtime": 2.3947, | |
| "eval_samples_per_second": 812.203, | |
| "eval_steps_per_second": 12.945, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 10.209150326797385, | |
| "eval_loss": 1.9699444770812988, | |
| "eval_runtime": 2.3837, | |
| "eval_samples_per_second": 815.943, | |
| "eval_steps_per_second": 13.005, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 10.313725490196079, | |
| "eval_loss": 2.0316123962402344, | |
| "eval_runtime": 2.3823, | |
| "eval_samples_per_second": 816.431, | |
| "eval_steps_per_second": 13.013, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 10.418300653594772, | |
| "eval_loss": 1.912984848022461, | |
| "eval_runtime": 2.3869, | |
| "eval_samples_per_second": 814.86, | |
| "eval_steps_per_second": 12.987, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 10.522875816993464, | |
| "eval_loss": 1.9707229137420654, | |
| "eval_runtime": 2.3837, | |
| "eval_samples_per_second": 815.942, | |
| "eval_steps_per_second": 13.005, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 10.627450980392156, | |
| "eval_loss": 1.9623687267303467, | |
| "eval_runtime": 2.455, | |
| "eval_samples_per_second": 792.269, | |
| "eval_steps_per_second": 12.627, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 10.732026143790849, | |
| "eval_loss": 1.9515836238861084, | |
| "eval_runtime": 2.4477, | |
| "eval_samples_per_second": 794.62, | |
| "eval_steps_per_second": 12.665, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 10.836601307189543, | |
| "eval_loss": 1.9508367776870728, | |
| "eval_runtime": 2.407, | |
| "eval_samples_per_second": 808.071, | |
| "eval_steps_per_second": 12.879, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 10.941176470588236, | |
| "eval_loss": 1.9166395664215088, | |
| "eval_runtime": 2.4193, | |
| "eval_samples_per_second": 803.965, | |
| "eval_steps_per_second": 12.814, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 10.941176470588236, | |
| "grad_norm": 1.911039113998413, | |
| "learning_rate": 1.6595744680851064e-05, | |
| "loss": 1.9835, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 11.104575163398692, | |
| "eval_loss": 1.9469496011734009, | |
| "eval_runtime": 2.4171, | |
| "eval_samples_per_second": 804.693, | |
| "eval_steps_per_second": 12.825, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 11.209150326797385, | |
| "eval_loss": 1.962018609046936, | |
| "eval_runtime": 2.3932, | |
| "eval_samples_per_second": 812.727, | |
| "eval_steps_per_second": 12.953, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 11.313725490196079, | |
| "eval_loss": 1.94699227809906, | |
| "eval_runtime": 2.3789, | |
| "eval_samples_per_second": 817.61, | |
| "eval_steps_per_second": 13.031, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 11.418300653594772, | |
| "eval_loss": 1.945833444595337, | |
| "eval_runtime": 2.3834, | |
| "eval_samples_per_second": 816.059, | |
| "eval_steps_per_second": 13.007, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 11.522875816993464, | |
| "eval_loss": 1.9585113525390625, | |
| "eval_runtime": 2.3811, | |
| "eval_samples_per_second": 816.833, | |
| "eval_steps_per_second": 13.019, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 11.627450980392156, | |
| "eval_loss": 1.9450502395629883, | |
| "eval_runtime": 2.381, | |
| "eval_samples_per_second": 816.892, | |
| "eval_steps_per_second": 13.02, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 11.732026143790849, | |
| "eval_loss": 1.9202919006347656, | |
| "eval_runtime": 2.4348, | |
| "eval_samples_per_second": 798.83, | |
| "eval_steps_per_second": 12.732, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 11.836601307189543, | |
| "eval_loss": 1.9322612285614014, | |
| "eval_runtime": 2.4503, | |
| "eval_samples_per_second": 793.773, | |
| "eval_steps_per_second": 12.651, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 11.941176470588236, | |
| "eval_loss": 1.9641361236572266, | |
| "eval_runtime": 2.4059, | |
| "eval_samples_per_second": 808.427, | |
| "eval_steps_per_second": 12.885, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 11.941176470588236, | |
| "grad_norm": 1.9469199180603027, | |
| "learning_rate": 1.6170212765957446e-05, | |
| "loss": 1.9719, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 12.104575163398692, | |
| "eval_loss": 1.9262347221374512, | |
| "eval_runtime": 2.4058, | |
| "eval_samples_per_second": 808.471, | |
| "eval_steps_per_second": 12.886, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 12.209150326797385, | |
| "eval_loss": 1.9799877405166626, | |
| "eval_runtime": 2.4101, | |
| "eval_samples_per_second": 807.019, | |
| "eval_steps_per_second": 12.863, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 12.313725490196079, | |
| "eval_loss": 1.9421709775924683, | |
| "eval_runtime": 2.3763, | |
| "eval_samples_per_second": 818.512, | |
| "eval_steps_per_second": 13.046, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 12.418300653594772, | |
| "eval_loss": 1.9286293983459473, | |
| "eval_runtime": 2.4023, | |
| "eval_samples_per_second": 809.639, | |
| "eval_steps_per_second": 12.904, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 12.522875816993464, | |
| "eval_loss": 1.9933801889419556, | |
| "eval_runtime": 2.3795, | |
| "eval_samples_per_second": 817.406, | |
| "eval_steps_per_second": 13.028, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 12.627450980392156, | |
| "eval_loss": 1.9704465866088867, | |
| "eval_runtime": 2.3792, | |
| "eval_samples_per_second": 817.502, | |
| "eval_steps_per_second": 13.03, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 12.732026143790849, | |
| "eval_loss": 1.939013957977295, | |
| "eval_runtime": 2.4295, | |
| "eval_samples_per_second": 800.592, | |
| "eval_steps_per_second": 12.76, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 12.836601307189543, | |
| "eval_loss": 1.916093349456787, | |
| "eval_runtime": 2.3862, | |
| "eval_samples_per_second": 815.104, | |
| "eval_steps_per_second": 12.991, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 12.941176470588236, | |
| "eval_loss": 1.94829523563385, | |
| "eval_runtime": 2.4403, | |
| "eval_samples_per_second": 797.034, | |
| "eval_steps_per_second": 12.703, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 12.941176470588236, | |
| "grad_norm": 1.835829734802246, | |
| "learning_rate": 1.5744680851063832e-05, | |
| "loss": 1.9663, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 13.104575163398692, | |
| "eval_loss": 1.9584107398986816, | |
| "eval_runtime": 2.4351, | |
| "eval_samples_per_second": 798.732, | |
| "eval_steps_per_second": 12.73, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 13.209150326797385, | |
| "eval_loss": 1.9641852378845215, | |
| "eval_runtime": 2.4617, | |
| "eval_samples_per_second": 790.102, | |
| "eval_steps_per_second": 12.593, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 13.313725490196079, | |
| "eval_loss": 1.9446567296981812, | |
| "eval_runtime": 2.4215, | |
| "eval_samples_per_second": 803.226, | |
| "eval_steps_per_second": 12.802, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 13.418300653594772, | |
| "eval_loss": 2.001385450363159, | |
| "eval_runtime": 2.416, | |
| "eval_samples_per_second": 805.044, | |
| "eval_steps_per_second": 12.831, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 13.522875816993464, | |
| "eval_loss": 1.8805845975875854, | |
| "eval_runtime": 2.4069, | |
| "eval_samples_per_second": 808.102, | |
| "eval_steps_per_second": 12.88, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 13.627450980392156, | |
| "eval_loss": 1.9486974477767944, | |
| "eval_runtime": 2.4072, | |
| "eval_samples_per_second": 807.987, | |
| "eval_steps_per_second": 12.878, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 13.732026143790849, | |
| "eval_loss": 1.9180878400802612, | |
| "eval_runtime": 2.4102, | |
| "eval_samples_per_second": 806.973, | |
| "eval_steps_per_second": 12.862, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 13.836601307189543, | |
| "eval_loss": 1.9238054752349854, | |
| "eval_runtime": 2.3928, | |
| "eval_samples_per_second": 812.868, | |
| "eval_steps_per_second": 12.956, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 13.941176470588236, | |
| "eval_loss": 1.9513754844665527, | |
| "eval_runtime": 2.3862, | |
| "eval_samples_per_second": 815.11, | |
| "eval_steps_per_second": 12.991, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 13.941176470588236, | |
| "grad_norm": 1.8433274030685425, | |
| "learning_rate": 1.5319148936170214e-05, | |
| "loss": 1.9785, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 14.104575163398692, | |
| "eval_loss": 1.9426443576812744, | |
| "eval_runtime": 2.3889, | |
| "eval_samples_per_second": 814.174, | |
| "eval_steps_per_second": 12.977, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 14.209150326797385, | |
| "eval_loss": 1.9765559434890747, | |
| "eval_runtime": 2.3875, | |
| "eval_samples_per_second": 814.644, | |
| "eval_steps_per_second": 12.984, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 14.313725490196079, | |
| "eval_loss": 1.9118081331253052, | |
| "eval_runtime": 2.4268, | |
| "eval_samples_per_second": 801.463, | |
| "eval_steps_per_second": 12.774, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 14.418300653594772, | |
| "eval_loss": 1.9367104768753052, | |
| "eval_runtime": 2.4183, | |
| "eval_samples_per_second": 804.282, | |
| "eval_steps_per_second": 12.819, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 14.522875816993464, | |
| "eval_loss": 1.9372411966323853, | |
| "eval_runtime": 2.4201, | |
| "eval_samples_per_second": 803.68, | |
| "eval_steps_per_second": 12.809, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 14.627450980392156, | |
| "eval_loss": 1.923244595527649, | |
| "eval_runtime": 2.4324, | |
| "eval_samples_per_second": 799.606, | |
| "eval_steps_per_second": 12.744, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 14.732026143790849, | |
| "eval_loss": 1.999928593635559, | |
| "eval_runtime": 2.4161, | |
| "eval_samples_per_second": 805.026, | |
| "eval_steps_per_second": 12.831, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 14.836601307189543, | |
| "eval_loss": 1.9354963302612305, | |
| "eval_runtime": 2.3965, | |
| "eval_samples_per_second": 811.59, | |
| "eval_steps_per_second": 12.935, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 14.941176470588236, | |
| "eval_loss": 1.965717077255249, | |
| "eval_runtime": 2.4083, | |
| "eval_samples_per_second": 807.639, | |
| "eval_steps_per_second": 12.872, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 14.941176470588236, | |
| "grad_norm": 1.9256954193115234, | |
| "learning_rate": 1.4893617021276596e-05, | |
| "loss": 1.9329, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 15.104575163398692, | |
| "eval_loss": 1.9451290369033813, | |
| "eval_runtime": 2.3926, | |
| "eval_samples_per_second": 812.94, | |
| "eval_steps_per_second": 12.957, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 15.209150326797385, | |
| "eval_loss": 1.9596805572509766, | |
| "eval_runtime": 2.4003, | |
| "eval_samples_per_second": 810.326, | |
| "eval_steps_per_second": 12.915, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 15.313725490196079, | |
| "eval_loss": 1.9179918766021729, | |
| "eval_runtime": 2.388, | |
| "eval_samples_per_second": 814.49, | |
| "eval_steps_per_second": 12.982, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 15.418300653594772, | |
| "eval_loss": 1.9344438314437866, | |
| "eval_runtime": 2.4394, | |
| "eval_samples_per_second": 797.34, | |
| "eval_steps_per_second": 12.708, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 15.522875816993464, | |
| "eval_loss": 1.9772499799728394, | |
| "eval_runtime": 2.4332, | |
| "eval_samples_per_second": 799.368, | |
| "eval_steps_per_second": 12.741, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 15.627450980392156, | |
| "eval_loss": 1.9796696901321411, | |
| "eval_runtime": 2.4159, | |
| "eval_samples_per_second": 805.096, | |
| "eval_steps_per_second": 12.832, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 15.732026143790849, | |
| "eval_loss": 1.9060624837875366, | |
| "eval_runtime": 2.4134, | |
| "eval_samples_per_second": 805.928, | |
| "eval_steps_per_second": 12.845, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 15.836601307189543, | |
| "eval_loss": 1.8885753154754639, | |
| "eval_runtime": 2.4138, | |
| "eval_samples_per_second": 805.794, | |
| "eval_steps_per_second": 12.843, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 15.941176470588236, | |
| "eval_loss": 1.9685148000717163, | |
| "eval_runtime": 2.4145, | |
| "eval_samples_per_second": 805.548, | |
| "eval_steps_per_second": 12.839, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 15.941176470588236, | |
| "grad_norm": 1.8362805843353271, | |
| "learning_rate": 1.4468085106382981e-05, | |
| "loss": 1.9144, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 16.104575163398692, | |
| "eval_loss": 1.9797979593276978, | |
| "eval_runtime": 2.3896, | |
| "eval_samples_per_second": 813.952, | |
| "eval_steps_per_second": 12.973, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 16.209150326797385, | |
| "eval_loss": 1.9587923288345337, | |
| "eval_runtime": 2.4182, | |
| "eval_samples_per_second": 804.331, | |
| "eval_steps_per_second": 12.82, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 16.313725490196077, | |
| "eval_loss": 1.9274431467056274, | |
| "eval_runtime": 2.3878, | |
| "eval_samples_per_second": 814.56, | |
| "eval_steps_per_second": 12.983, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 16.41830065359477, | |
| "eval_loss": 1.958984375, | |
| "eval_runtime": 2.4353, | |
| "eval_samples_per_second": 798.68, | |
| "eval_steps_per_second": 12.73, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 16.522875816993466, | |
| "eval_loss": 1.9552897214889526, | |
| "eval_runtime": 2.4367, | |
| "eval_samples_per_second": 798.2, | |
| "eval_steps_per_second": 12.722, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 16.627450980392158, | |
| "eval_loss": 1.9142913818359375, | |
| "eval_runtime": 2.4118, | |
| "eval_samples_per_second": 806.44, | |
| "eval_steps_per_second": 12.853, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 16.73202614379085, | |
| "eval_loss": 1.9268592596054077, | |
| "eval_runtime": 2.4143, | |
| "eval_samples_per_second": 805.633, | |
| "eval_steps_per_second": 12.84, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 16.836601307189543, | |
| "eval_loss": 1.965384840965271, | |
| "eval_runtime": 2.4177, | |
| "eval_samples_per_second": 804.492, | |
| "eval_steps_per_second": 12.822, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 16.941176470588236, | |
| "eval_loss": 1.9789389371871948, | |
| "eval_runtime": 2.4157, | |
| "eval_samples_per_second": 805.134, | |
| "eval_steps_per_second": 12.832, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 16.941176470588236, | |
| "grad_norm": 1.918270230293274, | |
| "learning_rate": 1.4042553191489363e-05, | |
| "loss": 1.9103, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 17.104575163398692, | |
| "eval_loss": 1.9568538665771484, | |
| "eval_runtime": 2.4112, | |
| "eval_samples_per_second": 806.642, | |
| "eval_steps_per_second": 12.857, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 17.209150326797385, | |
| "eval_loss": 1.9652351140975952, | |
| "eval_runtime": 2.3867, | |
| "eval_samples_per_second": 814.935, | |
| "eval_steps_per_second": 12.989, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 17.313725490196077, | |
| "eval_loss": 1.9810242652893066, | |
| "eval_runtime": 2.3904, | |
| "eval_samples_per_second": 813.655, | |
| "eval_steps_per_second": 12.968, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 17.41830065359477, | |
| "eval_loss": 1.928475260734558, | |
| "eval_runtime": 2.3938, | |
| "eval_samples_per_second": 812.526, | |
| "eval_steps_per_second": 12.95, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 17.522875816993466, | |
| "eval_loss": 1.937834620475769, | |
| "eval_runtime": 2.4231, | |
| "eval_samples_per_second": 802.677, | |
| "eval_steps_per_second": 12.793, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 17.627450980392158, | |
| "eval_loss": 1.9520132541656494, | |
| "eval_runtime": 2.4525, | |
| "eval_samples_per_second": 793.081, | |
| "eval_steps_per_second": 12.64, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 17.73202614379085, | |
| "eval_loss": 1.9782063961029053, | |
| "eval_runtime": 2.4273, | |
| "eval_samples_per_second": 801.302, | |
| "eval_steps_per_second": 12.771, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 17.836601307189543, | |
| "eval_loss": 1.9681016206741333, | |
| "eval_runtime": 2.4116, | |
| "eval_samples_per_second": 806.515, | |
| "eval_steps_per_second": 12.854, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 17.941176470588236, | |
| "eval_loss": 1.8925799131393433, | |
| "eval_runtime": 2.4098, | |
| "eval_samples_per_second": 807.121, | |
| "eval_steps_per_second": 12.864, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 17.941176470588236, | |
| "grad_norm": 1.8396626710891724, | |
| "learning_rate": 1.3617021276595745e-05, | |
| "loss": 1.887, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 18.104575163398692, | |
| "eval_loss": 1.9333585500717163, | |
| "eval_runtime": 2.4106, | |
| "eval_samples_per_second": 806.84, | |
| "eval_steps_per_second": 12.86, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 18.209150326797385, | |
| "eval_loss": 1.925223469734192, | |
| "eval_runtime": 2.3936, | |
| "eval_samples_per_second": 812.57, | |
| "eval_steps_per_second": 12.951, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 18.313725490196077, | |
| "eval_loss": 1.9398906230926514, | |
| "eval_runtime": 2.4263, | |
| "eval_samples_per_second": 801.632, | |
| "eval_steps_per_second": 12.777, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 18.41830065359477, | |
| "eval_loss": 1.9518330097198486, | |
| "eval_runtime": 2.3924, | |
| "eval_samples_per_second": 812.992, | |
| "eval_steps_per_second": 12.958, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 18.522875816993466, | |
| "eval_loss": 1.992385983467102, | |
| "eval_runtime": 2.3882, | |
| "eval_samples_per_second": 814.425, | |
| "eval_steps_per_second": 12.981, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 18.627450980392158, | |
| "eval_loss": 1.905411720275879, | |
| "eval_runtime": 2.3878, | |
| "eval_samples_per_second": 814.555, | |
| "eval_steps_per_second": 12.983, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 18.73202614379085, | |
| "eval_loss": 1.9480212926864624, | |
| "eval_runtime": 2.4462, | |
| "eval_samples_per_second": 795.096, | |
| "eval_steps_per_second": 12.672, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 18.836601307189543, | |
| "eval_loss": 1.9308433532714844, | |
| "eval_runtime": 2.445, | |
| "eval_samples_per_second": 795.492, | |
| "eval_steps_per_second": 12.679, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 18.941176470588236, | |
| "eval_loss": 1.9342797994613647, | |
| "eval_runtime": 2.414, | |
| "eval_samples_per_second": 805.712, | |
| "eval_steps_per_second": 12.842, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 18.941176470588236, | |
| "grad_norm": 1.8954132795333862, | |
| "learning_rate": 1.3191489361702127e-05, | |
| "loss": 1.8644, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 19.104575163398692, | |
| "eval_loss": 1.9860589504241943, | |
| "eval_runtime": 2.4977, | |
| "eval_samples_per_second": 778.731, | |
| "eval_steps_per_second": 12.412, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 19.209150326797385, | |
| "eval_loss": 1.9452682733535767, | |
| "eval_runtime": 2.413, | |
| "eval_samples_per_second": 806.043, | |
| "eval_steps_per_second": 12.847, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 19.313725490196077, | |
| "eval_loss": 1.8998777866363525, | |
| "eval_runtime": 2.3895, | |
| "eval_samples_per_second": 813.974, | |
| "eval_steps_per_second": 12.973, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 19.41830065359477, | |
| "eval_loss": 1.93086838722229, | |
| "eval_runtime": 2.383, | |
| "eval_samples_per_second": 816.195, | |
| "eval_steps_per_second": 13.009, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 19.522875816993466, | |
| "eval_loss": 1.954423189163208, | |
| "eval_runtime": 2.393, | |
| "eval_samples_per_second": 812.774, | |
| "eval_steps_per_second": 12.954, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 19.627450980392158, | |
| "eval_loss": 1.9435521364212036, | |
| "eval_runtime": 2.4095, | |
| "eval_samples_per_second": 807.227, | |
| "eval_steps_per_second": 12.866, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 19.73202614379085, | |
| "eval_loss": 1.9165093898773193, | |
| "eval_runtime": 2.3936, | |
| "eval_samples_per_second": 812.598, | |
| "eval_steps_per_second": 12.951, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 19.836601307189543, | |
| "eval_loss": 1.9695576429367065, | |
| "eval_runtime": 2.4194, | |
| "eval_samples_per_second": 803.908, | |
| "eval_steps_per_second": 12.813, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 19.941176470588236, | |
| "eval_loss": 1.9247905015945435, | |
| "eval_runtime": 2.4477, | |
| "eval_samples_per_second": 794.61, | |
| "eval_steps_per_second": 12.665, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 19.941176470588236, | |
| "grad_norm": 1.8007246255874634, | |
| "learning_rate": 1.2765957446808513e-05, | |
| "loss": 1.8687, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 20.104575163398692, | |
| "eval_loss": 1.9517226219177246, | |
| "eval_runtime": 2.4132, | |
| "eval_samples_per_second": 805.981, | |
| "eval_steps_per_second": 12.846, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 20.209150326797385, | |
| "eval_loss": 1.9041943550109863, | |
| "eval_runtime": 2.4088, | |
| "eval_samples_per_second": 807.45, | |
| "eval_steps_per_second": 12.869, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 20.313725490196077, | |
| "eval_loss": 1.992538571357727, | |
| "eval_runtime": 2.4151, | |
| "eval_samples_per_second": 805.334, | |
| "eval_steps_per_second": 12.836, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 20.41830065359477, | |
| "eval_loss": 1.8842642307281494, | |
| "eval_runtime": 2.4235, | |
| "eval_samples_per_second": 802.553, | |
| "eval_steps_per_second": 12.791, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 20.522875816993466, | |
| "eval_loss": 1.979435682296753, | |
| "eval_runtime": 2.3874, | |
| "eval_samples_per_second": 814.695, | |
| "eval_steps_per_second": 12.985, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 20.627450980392158, | |
| "eval_loss": 1.9789183139801025, | |
| "eval_runtime": 2.3863, | |
| "eval_samples_per_second": 815.07, | |
| "eval_steps_per_second": 12.991, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 20.73202614379085, | |
| "eval_loss": 1.9192243814468384, | |
| "eval_runtime": 2.3878, | |
| "eval_samples_per_second": 814.573, | |
| "eval_steps_per_second": 12.983, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 20.836601307189543, | |
| "eval_loss": 1.9174364805221558, | |
| "eval_runtime": 2.3935, | |
| "eval_samples_per_second": 812.633, | |
| "eval_steps_per_second": 12.952, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 20.941176470588236, | |
| "eval_loss": 1.9568063020706177, | |
| "eval_runtime": 2.4211, | |
| "eval_samples_per_second": 803.354, | |
| "eval_steps_per_second": 12.804, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 20.941176470588236, | |
| "grad_norm": 1.8035422563552856, | |
| "learning_rate": 1.2340425531914895e-05, | |
| "loss": 1.8361, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 21.104575163398692, | |
| "eval_loss": 1.9128376245498657, | |
| "eval_runtime": 2.4627, | |
| "eval_samples_per_second": 789.793, | |
| "eval_steps_per_second": 12.588, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 21.209150326797385, | |
| "eval_loss": 1.9428894519805908, | |
| "eval_runtime": 2.4145, | |
| "eval_samples_per_second": 805.537, | |
| "eval_steps_per_second": 12.839, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 21.313725490196077, | |
| "eval_loss": 1.95577073097229, | |
| "eval_runtime": 2.4665, | |
| "eval_samples_per_second": 788.556, | |
| "eval_steps_per_second": 12.568, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 21.41830065359477, | |
| "eval_loss": 1.9128402471542358, | |
| "eval_runtime": 2.4073, | |
| "eval_samples_per_second": 807.972, | |
| "eval_steps_per_second": 12.878, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 21.522875816993466, | |
| "eval_loss": 1.9588518142700195, | |
| "eval_runtime": 2.3872, | |
| "eval_samples_per_second": 814.75, | |
| "eval_steps_per_second": 12.986, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 21.627450980392158, | |
| "eval_loss": 1.9744739532470703, | |
| "eval_runtime": 2.4193, | |
| "eval_samples_per_second": 803.936, | |
| "eval_steps_per_second": 12.813, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 21.73202614379085, | |
| "eval_loss": 1.9993598461151123, | |
| "eval_runtime": 2.3876, | |
| "eval_samples_per_second": 814.634, | |
| "eval_steps_per_second": 12.984, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 21.836601307189543, | |
| "eval_loss": 1.959428071975708, | |
| "eval_runtime": 2.3868, | |
| "eval_samples_per_second": 814.91, | |
| "eval_steps_per_second": 12.988, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 21.941176470588236, | |
| "eval_loss": 1.9063607454299927, | |
| "eval_runtime": 2.3998, | |
| "eval_samples_per_second": 810.501, | |
| "eval_steps_per_second": 12.918, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 21.941176470588236, | |
| "grad_norm": 1.7296489477157593, | |
| "learning_rate": 1.1914893617021277e-05, | |
| "loss": 1.8461, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 22.104575163398692, | |
| "eval_loss": 1.9475386142730713, | |
| "eval_runtime": 2.4105, | |
| "eval_samples_per_second": 806.877, | |
| "eval_steps_per_second": 12.86, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 22.209150326797385, | |
| "eval_loss": 1.9637689590454102, | |
| "eval_runtime": 2.4134, | |
| "eval_samples_per_second": 805.906, | |
| "eval_steps_per_second": 12.845, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 22.313725490196077, | |
| "eval_loss": 1.9350510835647583, | |
| "eval_runtime": 2.4228, | |
| "eval_samples_per_second": 802.787, | |
| "eval_steps_per_second": 12.795, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 22.41830065359477, | |
| "eval_loss": 1.9184238910675049, | |
| "eval_runtime": 2.4127, | |
| "eval_samples_per_second": 806.144, | |
| "eval_steps_per_second": 12.849, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 22.522875816993466, | |
| "eval_loss": 1.9656862020492554, | |
| "eval_runtime": 2.4145, | |
| "eval_samples_per_second": 805.559, | |
| "eval_steps_per_second": 12.839, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 22.627450980392158, | |
| "eval_loss": 1.9108870029449463, | |
| "eval_runtime": 2.3915, | |
| "eval_samples_per_second": 813.292, | |
| "eval_steps_per_second": 12.962, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 22.73202614379085, | |
| "eval_loss": 1.9319818019866943, | |
| "eval_runtime": 2.3918, | |
| "eval_samples_per_second": 813.179, | |
| "eval_steps_per_second": 12.961, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 22.836601307189543, | |
| "eval_loss": 1.9680215120315552, | |
| "eval_runtime": 2.4001, | |
| "eval_samples_per_second": 810.398, | |
| "eval_steps_per_second": 12.916, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 22.941176470588236, | |
| "eval_loss": 1.9628697633743286, | |
| "eval_runtime": 2.3906, | |
| "eval_samples_per_second": 813.608, | |
| "eval_steps_per_second": 12.968, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 22.941176470588236, | |
| "grad_norm": 1.8930681943893433, | |
| "learning_rate": 1.1489361702127662e-05, | |
| "loss": 1.8246, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 23.104575163398692, | |
| "eval_loss": 1.9429619312286377, | |
| "eval_runtime": 2.3877, | |
| "eval_samples_per_second": 814.582, | |
| "eval_steps_per_second": 12.983, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 23.209150326797385, | |
| "eval_loss": 1.9262027740478516, | |
| "eval_runtime": 2.5188, | |
| "eval_samples_per_second": 772.187, | |
| "eval_steps_per_second": 12.307, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 23.313725490196077, | |
| "eval_loss": 1.9614677429199219, | |
| "eval_runtime": 2.4709, | |
| "eval_samples_per_second": 787.175, | |
| "eval_steps_per_second": 12.546, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 23.41830065359477, | |
| "eval_loss": 1.9559693336486816, | |
| "eval_runtime": 2.4152, | |
| "eval_samples_per_second": 805.308, | |
| "eval_steps_per_second": 12.835, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 23.522875816993466, | |
| "eval_loss": 1.966059923171997, | |
| "eval_runtime": 2.4122, | |
| "eval_samples_per_second": 806.317, | |
| "eval_steps_per_second": 12.851, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 23.627450980392158, | |
| "eval_loss": 1.9780749082565308, | |
| "eval_runtime": 2.437, | |
| "eval_samples_per_second": 798.101, | |
| "eval_steps_per_second": 12.72, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 23.73202614379085, | |
| "eval_loss": 1.980626106262207, | |
| "eval_runtime": 2.404, | |
| "eval_samples_per_second": 809.055, | |
| "eval_steps_per_second": 12.895, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 23.836601307189543, | |
| "eval_loss": 1.9735476970672607, | |
| "eval_runtime": 2.3933, | |
| "eval_samples_per_second": 812.67, | |
| "eval_steps_per_second": 12.953, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 23.941176470588236, | |
| "eval_loss": 1.9582773447036743, | |
| "eval_runtime": 2.4128, | |
| "eval_samples_per_second": 806.128, | |
| "eval_steps_per_second": 12.848, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 23.941176470588236, | |
| "grad_norm": 1.8361761569976807, | |
| "learning_rate": 1.1063829787234044e-05, | |
| "loss": 1.8181, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 24.104575163398692, | |
| "eval_loss": 1.9554569721221924, | |
| "eval_runtime": 2.387, | |
| "eval_samples_per_second": 814.839, | |
| "eval_steps_per_second": 12.987, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 24.209150326797385, | |
| "eval_loss": 1.9165290594100952, | |
| "eval_runtime": 2.3919, | |
| "eval_samples_per_second": 813.148, | |
| "eval_steps_per_second": 12.96, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 24.313725490196077, | |
| "eval_loss": 1.9637575149536133, | |
| "eval_runtime": 2.4007, | |
| "eval_samples_per_second": 810.191, | |
| "eval_steps_per_second": 12.913, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 24.41830065359477, | |
| "eval_loss": 2.000793218612671, | |
| "eval_runtime": 2.4153, | |
| "eval_samples_per_second": 805.272, | |
| "eval_steps_per_second": 12.835, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 24.522875816993466, | |
| "eval_loss": 1.9246618747711182, | |
| "eval_runtime": 2.4113, | |
| "eval_samples_per_second": 806.61, | |
| "eval_steps_per_second": 12.856, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 24.627450980392158, | |
| "eval_loss": 1.9719598293304443, | |
| "eval_runtime": 2.4103, | |
| "eval_samples_per_second": 806.944, | |
| "eval_steps_per_second": 12.861, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 24.73202614379085, | |
| "eval_loss": 2.008406400680542, | |
| "eval_runtime": 2.4142, | |
| "eval_samples_per_second": 805.666, | |
| "eval_steps_per_second": 12.841, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 24.836601307189543, | |
| "eval_loss": 1.942387580871582, | |
| "eval_runtime": 2.4335, | |
| "eval_samples_per_second": 799.261, | |
| "eval_steps_per_second": 12.739, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 24.941176470588236, | |
| "eval_loss": 1.9110654592514038, | |
| "eval_runtime": 2.3897, | |
| "eval_samples_per_second": 813.923, | |
| "eval_steps_per_second": 12.973, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 24.941176470588236, | |
| "grad_norm": 1.8342725038528442, | |
| "learning_rate": 1.0638297872340426e-05, | |
| "loss": 1.797, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 25.104575163398692, | |
| "eval_loss": 1.9787415266036987, | |
| "eval_runtime": 2.4748, | |
| "eval_samples_per_second": 785.908, | |
| "eval_steps_per_second": 12.526, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 25.209150326797385, | |
| "eval_loss": 1.9613263607025146, | |
| "eval_runtime": 2.3841, | |
| "eval_samples_per_second": 815.829, | |
| "eval_steps_per_second": 13.003, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 25.313725490196077, | |
| "eval_loss": 1.8806324005126953, | |
| "eval_runtime": 2.4198, | |
| "eval_samples_per_second": 803.779, | |
| "eval_steps_per_second": 12.811, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 25.41830065359477, | |
| "eval_loss": 1.9231013059616089, | |
| "eval_runtime": 2.3851, | |
| "eval_samples_per_second": 815.485, | |
| "eval_steps_per_second": 12.997, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 25.522875816993466, | |
| "eval_loss": 1.9021631479263306, | |
| "eval_runtime": 2.3853, | |
| "eval_samples_per_second": 815.428, | |
| "eval_steps_per_second": 12.997, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 25.627450980392158, | |
| "eval_loss": 1.9682537317276, | |
| "eval_runtime": 2.4444, | |
| "eval_samples_per_second": 795.691, | |
| "eval_steps_per_second": 12.682, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 25.73202614379085, | |
| "eval_loss": 1.9824862480163574, | |
| "eval_runtime": 2.4349, | |
| "eval_samples_per_second": 798.799, | |
| "eval_steps_per_second": 12.732, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 25.836601307189543, | |
| "eval_loss": 1.962891936302185, | |
| "eval_runtime": 2.411, | |
| "eval_samples_per_second": 806.729, | |
| "eval_steps_per_second": 12.858, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 25.941176470588236, | |
| "eval_loss": 1.9116088151931763, | |
| "eval_runtime": 2.4247, | |
| "eval_samples_per_second": 802.172, | |
| "eval_steps_per_second": 12.785, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 25.941176470588236, | |
| "grad_norm": 1.9028220176696777, | |
| "learning_rate": 1.0212765957446808e-05, | |
| "loss": 1.7749, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 26.104575163398692, | |
| "eval_loss": 1.9699651002883911, | |
| "eval_runtime": 2.4102, | |
| "eval_samples_per_second": 806.995, | |
| "eval_steps_per_second": 12.862, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 26.209150326797385, | |
| "eval_loss": 1.9811697006225586, | |
| "eval_runtime": 2.386, | |
| "eval_samples_per_second": 815.174, | |
| "eval_steps_per_second": 12.992, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 26.313725490196077, | |
| "eval_loss": 1.9248907566070557, | |
| "eval_runtime": 2.3932, | |
| "eval_samples_per_second": 812.727, | |
| "eval_steps_per_second": 12.953, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 26.41830065359477, | |
| "eval_loss": 1.9683917760849, | |
| "eval_runtime": 2.3996, | |
| "eval_samples_per_second": 810.537, | |
| "eval_steps_per_second": 12.919, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 26.522875816993466, | |
| "eval_loss": 1.9604750871658325, | |
| "eval_runtime": 2.3993, | |
| "eval_samples_per_second": 810.666, | |
| "eval_steps_per_second": 12.921, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 26.627450980392158, | |
| "eval_loss": 1.8918408155441284, | |
| "eval_runtime": 2.4162, | |
| "eval_samples_per_second": 804.968, | |
| "eval_steps_per_second": 12.83, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 26.73202614379085, | |
| "eval_loss": 1.9443118572235107, | |
| "eval_runtime": 2.3904, | |
| "eval_samples_per_second": 813.679, | |
| "eval_steps_per_second": 12.969, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 26.836601307189543, | |
| "eval_loss": 1.9147528409957886, | |
| "eval_runtime": 2.4305, | |
| "eval_samples_per_second": 800.261, | |
| "eval_steps_per_second": 12.755, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 26.941176470588236, | |
| "eval_loss": 1.8974157571792603, | |
| "eval_runtime": 2.4514, | |
| "eval_samples_per_second": 793.417, | |
| "eval_steps_per_second": 12.646, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 26.941176470588236, | |
| "grad_norm": 1.7782148122787476, | |
| "learning_rate": 9.787234042553192e-06, | |
| "loss": 1.8022, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 27.104575163398692, | |
| "eval_loss": 1.9711873531341553, | |
| "eval_runtime": 2.4377, | |
| "eval_samples_per_second": 797.883, | |
| "eval_steps_per_second": 12.717, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 27.209150326797385, | |
| "eval_loss": 1.9718581438064575, | |
| "eval_runtime": 2.404, | |
| "eval_samples_per_second": 809.082, | |
| "eval_steps_per_second": 12.895, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 27.313725490196077, | |
| "eval_loss": 1.9540036916732788, | |
| "eval_runtime": 2.411, | |
| "eval_samples_per_second": 806.731, | |
| "eval_steps_per_second": 12.858, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 27.41830065359477, | |
| "eval_loss": 1.8907063007354736, | |
| "eval_runtime": 2.3875, | |
| "eval_samples_per_second": 814.658, | |
| "eval_steps_per_second": 12.984, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 27.522875816993466, | |
| "eval_loss": 1.9907869100570679, | |
| "eval_runtime": 2.3956, | |
| "eval_samples_per_second": 811.916, | |
| "eval_steps_per_second": 12.941, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 27.627450980392158, | |
| "eval_loss": 1.9273970127105713, | |
| "eval_runtime": 2.3922, | |
| "eval_samples_per_second": 813.044, | |
| "eval_steps_per_second": 12.959, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 27.73202614379085, | |
| "eval_loss": 1.9233652353286743, | |
| "eval_runtime": 2.3912, | |
| "eval_samples_per_second": 813.399, | |
| "eval_steps_per_second": 12.964, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 27.836601307189543, | |
| "eval_loss": 1.9580994844436646, | |
| "eval_runtime": 2.3875, | |
| "eval_samples_per_second": 814.648, | |
| "eval_steps_per_second": 12.984, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 27.941176470588236, | |
| "eval_loss": 1.9409220218658447, | |
| "eval_runtime": 2.4743, | |
| "eval_samples_per_second": 786.095, | |
| "eval_steps_per_second": 12.529, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 27.941176470588236, | |
| "grad_norm": 1.8487893342971802, | |
| "learning_rate": 9.361702127659576e-06, | |
| "loss": 1.7879, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 28.104575163398692, | |
| "eval_loss": 1.8715720176696777, | |
| "eval_runtime": 2.4867, | |
| "eval_samples_per_second": 782.152, | |
| "eval_steps_per_second": 12.466, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 28.209150326797385, | |
| "eval_loss": 1.9945265054702759, | |
| "eval_runtime": 2.4913, | |
| "eval_samples_per_second": 780.713, | |
| "eval_steps_per_second": 12.443, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 28.313725490196077, | |
| "eval_loss": 1.8658331632614136, | |
| "eval_runtime": 2.4682, | |
| "eval_samples_per_second": 788.013, | |
| "eval_steps_per_second": 12.56, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 28.41830065359477, | |
| "eval_loss": 1.946846604347229, | |
| "eval_runtime": 2.4685, | |
| "eval_samples_per_second": 787.912, | |
| "eval_steps_per_second": 12.558, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 28.522875816993466, | |
| "eval_loss": 1.9456650018692017, | |
| "eval_runtime": 2.4963, | |
| "eval_samples_per_second": 779.142, | |
| "eval_steps_per_second": 12.418, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 28.627450980392158, | |
| "eval_loss": 1.9555299282073975, | |
| "eval_runtime": 2.467, | |
| "eval_samples_per_second": 788.417, | |
| "eval_steps_per_second": 12.566, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 28.73202614379085, | |
| "eval_loss": 1.9544572830200195, | |
| "eval_runtime": 2.4342, | |
| "eval_samples_per_second": 799.038, | |
| "eval_steps_per_second": 12.735, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 28.836601307189543, | |
| "eval_loss": 1.9225515127182007, | |
| "eval_runtime": 2.3903, | |
| "eval_samples_per_second": 813.707, | |
| "eval_steps_per_second": 12.969, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 28.941176470588236, | |
| "eval_loss": 1.9331358671188354, | |
| "eval_runtime": 2.3878, | |
| "eval_samples_per_second": 814.568, | |
| "eval_steps_per_second": 12.983, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 28.941176470588236, | |
| "grad_norm": 1.77451491355896, | |
| "learning_rate": 8.936170212765958e-06, | |
| "loss": 1.8019, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 29.104575163398692, | |
| "eval_loss": 1.9785720109939575, | |
| "eval_runtime": 2.4363, | |
| "eval_samples_per_second": 798.34, | |
| "eval_steps_per_second": 12.724, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 29.209150326797385, | |
| "eval_loss": 1.9767541885375977, | |
| "eval_runtime": 2.4358, | |
| "eval_samples_per_second": 798.515, | |
| "eval_steps_per_second": 12.727, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 29.313725490196077, | |
| "eval_loss": 1.9601216316223145, | |
| "eval_runtime": 2.4048, | |
| "eval_samples_per_second": 808.808, | |
| "eval_steps_per_second": 12.891, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 29.41830065359477, | |
| "eval_loss": 1.917155385017395, | |
| "eval_runtime": 2.4141, | |
| "eval_samples_per_second": 805.697, | |
| "eval_steps_per_second": 12.841, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 29.522875816993466, | |
| "eval_loss": 1.922187328338623, | |
| "eval_runtime": 2.4093, | |
| "eval_samples_per_second": 807.305, | |
| "eval_steps_per_second": 12.867, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 29.627450980392158, | |
| "eval_loss": 1.918397307395935, | |
| "eval_runtime": 2.3948, | |
| "eval_samples_per_second": 812.187, | |
| "eval_steps_per_second": 12.945, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 29.73202614379085, | |
| "eval_loss": 1.882236123085022, | |
| "eval_runtime": 2.3912, | |
| "eval_samples_per_second": 813.4, | |
| "eval_steps_per_second": 12.964, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 29.836601307189543, | |
| "eval_loss": 1.916178822517395, | |
| "eval_runtime": 2.3869, | |
| "eval_samples_per_second": 814.874, | |
| "eval_steps_per_second": 12.988, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 29.941176470588236, | |
| "eval_loss": 1.9769715070724487, | |
| "eval_runtime": 2.3858, | |
| "eval_samples_per_second": 815.249, | |
| "eval_steps_per_second": 12.994, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 29.941176470588236, | |
| "grad_norm": 2.04988431930542, | |
| "learning_rate": 8.510638297872341e-06, | |
| "loss": 1.7614, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 30.104575163398692, | |
| "eval_loss": 1.9033125638961792, | |
| "eval_runtime": 2.4701, | |
| "eval_samples_per_second": 787.431, | |
| "eval_steps_per_second": 12.55, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 30.209150326797385, | |
| "eval_loss": 1.9454644918441772, | |
| "eval_runtime": 2.4111, | |
| "eval_samples_per_second": 806.698, | |
| "eval_steps_per_second": 12.857, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 30.313725490196077, | |
| "eval_loss": 1.9106584787368774, | |
| "eval_runtime": 2.4243, | |
| "eval_samples_per_second": 802.304, | |
| "eval_steps_per_second": 12.787, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 30.41830065359477, | |
| "eval_loss": 1.9812813997268677, | |
| "eval_runtime": 2.4148, | |
| "eval_samples_per_second": 805.466, | |
| "eval_steps_per_second": 12.838, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 30.522875816993466, | |
| "eval_loss": 1.9426772594451904, | |
| "eval_runtime": 2.3997, | |
| "eval_samples_per_second": 810.526, | |
| "eval_steps_per_second": 12.918, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 30.627450980392158, | |
| "eval_loss": 1.9499095678329468, | |
| "eval_runtime": 2.3957, | |
| "eval_samples_per_second": 811.863, | |
| "eval_steps_per_second": 12.94, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 30.73202614379085, | |
| "eval_loss": 1.961235523223877, | |
| "eval_runtime": 2.3821, | |
| "eval_samples_per_second": 816.509, | |
| "eval_steps_per_second": 13.014, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 30.836601307189543, | |
| "eval_loss": 1.9450849294662476, | |
| "eval_runtime": 2.3942, | |
| "eval_samples_per_second": 812.387, | |
| "eval_steps_per_second": 12.948, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 30.941176470588236, | |
| "eval_loss": 1.9132739305496216, | |
| "eval_runtime": 2.4136, | |
| "eval_samples_per_second": 805.836, | |
| "eval_steps_per_second": 12.844, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 30.941176470588236, | |
| "grad_norm": 1.8763809204101562, | |
| "learning_rate": 8.085106382978723e-06, | |
| "loss": 1.7619, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 31.104575163398692, | |
| "eval_loss": 1.9205107688903809, | |
| "eval_runtime": 2.4332, | |
| "eval_samples_per_second": 799.374, | |
| "eval_steps_per_second": 12.741, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 31.209150326797385, | |
| "eval_loss": 1.946841835975647, | |
| "eval_runtime": 2.464, | |
| "eval_samples_per_second": 789.356, | |
| "eval_steps_per_second": 12.581, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 31.313725490196077, | |
| "eval_loss": 1.9464671611785889, | |
| "eval_runtime": 2.4074, | |
| "eval_samples_per_second": 807.917, | |
| "eval_steps_per_second": 12.877, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 31.41830065359477, | |
| "eval_loss": 1.8833441734313965, | |
| "eval_runtime": 2.4082, | |
| "eval_samples_per_second": 807.64, | |
| "eval_steps_per_second": 12.872, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 31.522875816993466, | |
| "eval_loss": 1.9414160251617432, | |
| "eval_runtime": 2.407, | |
| "eval_samples_per_second": 808.072, | |
| "eval_steps_per_second": 12.879, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 31.627450980392158, | |
| "eval_loss": 1.9485697746276855, | |
| "eval_runtime": 2.4025, | |
| "eval_samples_per_second": 809.577, | |
| "eval_steps_per_second": 12.903, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 31.73202614379085, | |
| "eval_loss": 1.9184815883636475, | |
| "eval_runtime": 2.3828, | |
| "eval_samples_per_second": 816.275, | |
| "eval_steps_per_second": 13.01, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 31.836601307189543, | |
| "eval_loss": 1.9518897533416748, | |
| "eval_runtime": 2.4096, | |
| "eval_samples_per_second": 807.189, | |
| "eval_steps_per_second": 12.865, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 31.941176470588236, | |
| "eval_loss": 1.9385578632354736, | |
| "eval_runtime": 2.4088, | |
| "eval_samples_per_second": 807.447, | |
| "eval_steps_per_second": 12.869, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 31.941176470588236, | |
| "grad_norm": 1.8053061962127686, | |
| "learning_rate": 7.659574468085107e-06, | |
| "loss": 1.7713, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 32.10457516339869, | |
| "eval_loss": 1.8966560363769531, | |
| "eval_runtime": 2.4403, | |
| "eval_samples_per_second": 797.048, | |
| "eval_steps_per_second": 12.704, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 32.209150326797385, | |
| "eval_loss": 1.9649851322174072, | |
| "eval_runtime": 2.4092, | |
| "eval_samples_per_second": 807.307, | |
| "eval_steps_per_second": 12.867, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 32.31372549019608, | |
| "eval_loss": 1.919927954673767, | |
| "eval_runtime": 2.41, | |
| "eval_samples_per_second": 807.057, | |
| "eval_steps_per_second": 12.863, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 32.41830065359477, | |
| "eval_loss": 1.9147096872329712, | |
| "eval_runtime": 2.4127, | |
| "eval_samples_per_second": 806.134, | |
| "eval_steps_per_second": 12.848, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 32.52287581699346, | |
| "eval_loss": 1.9159774780273438, | |
| "eval_runtime": 2.4052, | |
| "eval_samples_per_second": 808.673, | |
| "eval_steps_per_second": 12.889, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 32.627450980392155, | |
| "eval_loss": 1.9953843355178833, | |
| "eval_runtime": 2.3906, | |
| "eval_samples_per_second": 813.616, | |
| "eval_steps_per_second": 12.968, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 32.73202614379085, | |
| "eval_loss": 1.92180597782135, | |
| "eval_runtime": 2.4249, | |
| "eval_samples_per_second": 802.081, | |
| "eval_steps_per_second": 12.784, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 32.83660130718954, | |
| "eval_loss": 1.9786967039108276, | |
| "eval_runtime": 2.3846, | |
| "eval_samples_per_second": 815.658, | |
| "eval_steps_per_second": 13.0, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 32.94117647058823, | |
| "eval_loss": 1.9362424612045288, | |
| "eval_runtime": 2.3879, | |
| "eval_samples_per_second": 814.534, | |
| "eval_steps_per_second": 12.982, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 32.94117647058823, | |
| "grad_norm": 2.0353338718414307, | |
| "learning_rate": 7.234042553191491e-06, | |
| "loss": 1.7635, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 33.10457516339869, | |
| "eval_loss": 1.9281338453292847, | |
| "eval_runtime": 2.4396, | |
| "eval_samples_per_second": 797.251, | |
| "eval_steps_per_second": 12.707, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 33.209150326797385, | |
| "eval_loss": 1.921140193939209, | |
| "eval_runtime": 2.4118, | |
| "eval_samples_per_second": 806.459, | |
| "eval_steps_per_second": 12.854, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 33.31372549019608, | |
| "eval_loss": 1.9680968523025513, | |
| "eval_runtime": 2.4046, | |
| "eval_samples_per_second": 808.854, | |
| "eval_steps_per_second": 12.892, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 33.41830065359477, | |
| "eval_loss": 1.9094316959381104, | |
| "eval_runtime": 2.4065, | |
| "eval_samples_per_second": 808.218, | |
| "eval_steps_per_second": 12.882, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 33.52287581699346, | |
| "eval_loss": 1.9845983982086182, | |
| "eval_runtime": 2.4045, | |
| "eval_samples_per_second": 808.905, | |
| "eval_steps_per_second": 12.893, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 33.627450980392155, | |
| "eval_loss": 1.9461405277252197, | |
| "eval_runtime": 2.4098, | |
| "eval_samples_per_second": 807.107, | |
| "eval_steps_per_second": 12.864, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 33.73202614379085, | |
| "eval_loss": 1.8947722911834717, | |
| "eval_runtime": 2.392, | |
| "eval_samples_per_second": 813.124, | |
| "eval_steps_per_second": 12.96, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 33.83660130718954, | |
| "eval_loss": 1.9371235370635986, | |
| "eval_runtime": 2.3855, | |
| "eval_samples_per_second": 815.347, | |
| "eval_steps_per_second": 12.995, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 33.94117647058823, | |
| "eval_loss": 2.006459951400757, | |
| "eval_runtime": 2.4252, | |
| "eval_samples_per_second": 802.01, | |
| "eval_steps_per_second": 12.783, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 33.94117647058823, | |
| "grad_norm": 1.884189486503601, | |
| "learning_rate": 6.808510638297873e-06, | |
| "loss": 1.7394, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 34.10457516339869, | |
| "eval_loss": 1.9282273054122925, | |
| "eval_runtime": 2.4222, | |
| "eval_samples_per_second": 802.994, | |
| "eval_steps_per_second": 12.798, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 34.209150326797385, | |
| "eval_loss": 1.9412920475006104, | |
| "eval_runtime": 2.5082, | |
| "eval_samples_per_second": 775.463, | |
| "eval_steps_per_second": 12.36, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 34.31372549019608, | |
| "eval_loss": 1.9883979558944702, | |
| "eval_runtime": 2.5353, | |
| "eval_samples_per_second": 767.18, | |
| "eval_steps_per_second": 12.228, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 34.41830065359477, | |
| "eval_loss": 1.917364239692688, | |
| "eval_runtime": 2.4621, | |
| "eval_samples_per_second": 789.978, | |
| "eval_steps_per_second": 12.591, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 34.52287581699346, | |
| "eval_loss": 1.9594651460647583, | |
| "eval_runtime": 2.5176, | |
| "eval_samples_per_second": 772.567, | |
| "eval_steps_per_second": 12.313, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 34.627450980392155, | |
| "eval_loss": 1.9423621892929077, | |
| "eval_runtime": 2.5063, | |
| "eval_samples_per_second": 776.037, | |
| "eval_steps_per_second": 12.369, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 34.73202614379085, | |
| "eval_loss": 1.9494574069976807, | |
| "eval_runtime": 2.4032, | |
| "eval_samples_per_second": 809.321, | |
| "eval_steps_per_second": 12.899, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 34.83660130718954, | |
| "eval_loss": 1.9160590171813965, | |
| "eval_runtime": 2.3866, | |
| "eval_samples_per_second": 814.959, | |
| "eval_steps_per_second": 12.989, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 34.94117647058823, | |
| "eval_loss": 1.963183879852295, | |
| "eval_runtime": 2.3866, | |
| "eval_samples_per_second": 814.96, | |
| "eval_steps_per_second": 12.989, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 34.94117647058823, | |
| "grad_norm": 1.7952407598495483, | |
| "learning_rate": 6.382978723404256e-06, | |
| "loss": 1.7434, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 35.10457516339869, | |
| "eval_loss": 1.9129880666732788, | |
| "eval_runtime": 2.3856, | |
| "eval_samples_per_second": 815.294, | |
| "eval_steps_per_second": 12.994, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 35.209150326797385, | |
| "eval_loss": 1.9850044250488281, | |
| "eval_runtime": 2.4394, | |
| "eval_samples_per_second": 797.311, | |
| "eval_steps_per_second": 12.708, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 35.31372549019608, | |
| "eval_loss": 1.9291285276412964, | |
| "eval_runtime": 2.3856, | |
| "eval_samples_per_second": 815.319, | |
| "eval_steps_per_second": 12.995, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 35.41830065359477, | |
| "eval_loss": 1.9300141334533691, | |
| "eval_runtime": 2.5153, | |
| "eval_samples_per_second": 773.266, | |
| "eval_steps_per_second": 12.325, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 35.52287581699346, | |
| "eval_loss": 1.9399768114089966, | |
| "eval_runtime": 2.4758, | |
| "eval_samples_per_second": 785.618, | |
| "eval_steps_per_second": 12.521, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 35.627450980392155, | |
| "eval_loss": 1.9735783338546753, | |
| "eval_runtime": 2.4325, | |
| "eval_samples_per_second": 799.598, | |
| "eval_steps_per_second": 12.744, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 35.73202614379085, | |
| "eval_loss": 1.9033024311065674, | |
| "eval_runtime": 2.4152, | |
| "eval_samples_per_second": 805.309, | |
| "eval_steps_per_second": 12.835, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 35.83660130718954, | |
| "eval_loss": 1.9249202013015747, | |
| "eval_runtime": 2.4104, | |
| "eval_samples_per_second": 806.927, | |
| "eval_steps_per_second": 12.861, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 35.94117647058823, | |
| "eval_loss": 1.9796316623687744, | |
| "eval_runtime": 2.4374, | |
| "eval_samples_per_second": 797.996, | |
| "eval_steps_per_second": 12.719, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 35.94117647058823, | |
| "grad_norm": 1.8199615478515625, | |
| "learning_rate": 5.957446808510638e-06, | |
| "loss": 1.7578, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 36.10457516339869, | |
| "eval_loss": 1.9595942497253418, | |
| "eval_runtime": 2.3871, | |
| "eval_samples_per_second": 814.78, | |
| "eval_steps_per_second": 12.986, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 36.209150326797385, | |
| "eval_loss": 1.9293735027313232, | |
| "eval_runtime": 2.3875, | |
| "eval_samples_per_second": 814.674, | |
| "eval_steps_per_second": 12.985, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 36.31372549019608, | |
| "eval_loss": 1.957201600074768, | |
| "eval_runtime": 2.3892, | |
| "eval_samples_per_second": 814.083, | |
| "eval_steps_per_second": 12.975, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 36.41830065359477, | |
| "eval_loss": 1.9536631107330322, | |
| "eval_runtime": 2.4048, | |
| "eval_samples_per_second": 808.796, | |
| "eval_steps_per_second": 12.891, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 36.52287581699346, | |
| "eval_loss": 1.974502682685852, | |
| "eval_runtime": 2.3928, | |
| "eval_samples_per_second": 812.843, | |
| "eval_steps_per_second": 12.955, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 36.627450980392155, | |
| "eval_loss": 1.9568116664886475, | |
| "eval_runtime": 2.4183, | |
| "eval_samples_per_second": 804.288, | |
| "eval_steps_per_second": 12.819, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 36.73202614379085, | |
| "eval_loss": 1.9689034223556519, | |
| "eval_runtime": 2.4676, | |
| "eval_samples_per_second": 788.229, | |
| "eval_steps_per_second": 12.563, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 36.83660130718954, | |
| "eval_loss": 1.9140371084213257, | |
| "eval_runtime": 2.4088, | |
| "eval_samples_per_second": 807.459, | |
| "eval_steps_per_second": 12.87, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 36.94117647058823, | |
| "eval_loss": 1.929794192314148, | |
| "eval_runtime": 2.4119, | |
| "eval_samples_per_second": 806.412, | |
| "eval_steps_per_second": 12.853, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 36.94117647058823, | |
| "grad_norm": 1.8074049949645996, | |
| "learning_rate": 5.531914893617022e-06, | |
| "loss": 1.7497, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 37.10457516339869, | |
| "eval_loss": 1.9698741436004639, | |
| "eval_runtime": 2.4082, | |
| "eval_samples_per_second": 807.67, | |
| "eval_steps_per_second": 12.873, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 37.209150326797385, | |
| "eval_loss": 1.90766441822052, | |
| "eval_runtime": 2.3974, | |
| "eval_samples_per_second": 811.298, | |
| "eval_steps_per_second": 12.931, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 37.31372549019608, | |
| "eval_loss": 1.9559139013290405, | |
| "eval_runtime": 2.3918, | |
| "eval_samples_per_second": 813.182, | |
| "eval_steps_per_second": 12.961, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 37.41830065359477, | |
| "eval_loss": 1.9621520042419434, | |
| "eval_runtime": 2.4126, | |
| "eval_samples_per_second": 806.189, | |
| "eval_steps_per_second": 12.849, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 37.52287581699346, | |
| "eval_loss": 1.9238826036453247, | |
| "eval_runtime": 2.3925, | |
| "eval_samples_per_second": 812.943, | |
| "eval_steps_per_second": 12.957, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 37.627450980392155, | |
| "eval_loss": 1.9738985300064087, | |
| "eval_runtime": 2.4262, | |
| "eval_samples_per_second": 801.666, | |
| "eval_steps_per_second": 12.777, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 37.73202614379085, | |
| "eval_loss": 1.936599612236023, | |
| "eval_runtime": 2.446, | |
| "eval_samples_per_second": 795.165, | |
| "eval_steps_per_second": 12.674, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 37.83660130718954, | |
| "eval_loss": 1.9857661724090576, | |
| "eval_runtime": 2.4387, | |
| "eval_samples_per_second": 797.57, | |
| "eval_steps_per_second": 12.712, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 37.94117647058823, | |
| "eval_loss": 1.9602775573730469, | |
| "eval_runtime": 2.4623, | |
| "eval_samples_per_second": 789.898, | |
| "eval_steps_per_second": 12.59, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 37.94117647058823, | |
| "grad_norm": 1.8968150615692139, | |
| "learning_rate": 5.106382978723404e-06, | |
| "loss": 1.7378, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 38.10457516339869, | |
| "eval_loss": 1.9392098188400269, | |
| "eval_runtime": 2.5445, | |
| "eval_samples_per_second": 764.389, | |
| "eval_steps_per_second": 12.183, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 38.209150326797385, | |
| "eval_loss": 1.9554734230041504, | |
| "eval_runtime": 2.4153, | |
| "eval_samples_per_second": 805.294, | |
| "eval_steps_per_second": 12.835, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 38.31372549019608, | |
| "eval_loss": 1.9802982807159424, | |
| "eval_runtime": 2.4043, | |
| "eval_samples_per_second": 808.971, | |
| "eval_steps_per_second": 12.894, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 38.41830065359477, | |
| "eval_loss": 1.950205683708191, | |
| "eval_runtime": 2.408, | |
| "eval_samples_per_second": 807.739, | |
| "eval_steps_per_second": 12.874, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 38.52287581699346, | |
| "eval_loss": 1.959083914756775, | |
| "eval_runtime": 2.3857, | |
| "eval_samples_per_second": 815.264, | |
| "eval_steps_per_second": 12.994, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 38.627450980392155, | |
| "eval_loss": 1.9582518339157104, | |
| "eval_runtime": 2.4306, | |
| "eval_samples_per_second": 800.213, | |
| "eval_steps_per_second": 12.754, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 38.73202614379085, | |
| "eval_loss": 1.9507373571395874, | |
| "eval_runtime": 2.3863, | |
| "eval_samples_per_second": 815.084, | |
| "eval_steps_per_second": 12.991, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 38.83660130718954, | |
| "eval_loss": 1.9410823583602905, | |
| "eval_runtime": 2.4092, | |
| "eval_samples_per_second": 807.311, | |
| "eval_steps_per_second": 12.867, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 38.94117647058823, | |
| "eval_loss": 1.922089695930481, | |
| "eval_runtime": 2.4113, | |
| "eval_samples_per_second": 806.625, | |
| "eval_steps_per_second": 12.856, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 38.94117647058823, | |
| "grad_norm": 1.8066309690475464, | |
| "learning_rate": 4.680851063829788e-06, | |
| "loss": 1.7324, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 39.10457516339869, | |
| "eval_loss": 1.9468454122543335, | |
| "eval_runtime": 2.3833, | |
| "eval_samples_per_second": 816.093, | |
| "eval_steps_per_second": 13.007, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 39.209150326797385, | |
| "eval_loss": 1.9370498657226562, | |
| "eval_runtime": 2.3853, | |
| "eval_samples_per_second": 815.41, | |
| "eval_steps_per_second": 12.996, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 39.31372549019608, | |
| "eval_loss": 1.9278494119644165, | |
| "eval_runtime": 2.4316, | |
| "eval_samples_per_second": 799.901, | |
| "eval_steps_per_second": 12.749, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 39.41830065359477, | |
| "eval_loss": 1.9604259729385376, | |
| "eval_runtime": 2.4996, | |
| "eval_samples_per_second": 778.116, | |
| "eval_steps_per_second": 12.402, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 39.52287581699346, | |
| "eval_loss": 1.9375855922698975, | |
| "eval_runtime": 2.4112, | |
| "eval_samples_per_second": 806.646, | |
| "eval_steps_per_second": 12.857, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 39.627450980392155, | |
| "eval_loss": 1.9473201036453247, | |
| "eval_runtime": 2.4652, | |
| "eval_samples_per_second": 788.988, | |
| "eval_steps_per_second": 12.575, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 39.73202614379085, | |
| "eval_loss": 1.9490294456481934, | |
| "eval_runtime": 2.4617, | |
| "eval_samples_per_second": 790.102, | |
| "eval_steps_per_second": 12.593, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 39.83660130718954, | |
| "eval_loss": 1.913360834121704, | |
| "eval_runtime": 2.445, | |
| "eval_samples_per_second": 795.514, | |
| "eval_steps_per_second": 12.679, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 39.94117647058823, | |
| "eval_loss": 1.9323056936264038, | |
| "eval_runtime": 2.4383, | |
| "eval_samples_per_second": 797.685, | |
| "eval_steps_per_second": 12.714, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 39.94117647058823, | |
| "grad_norm": 1.7927449941635132, | |
| "learning_rate": 4.255319148936171e-06, | |
| "loss": 1.7195, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 40.10457516339869, | |
| "eval_loss": 1.9119060039520264, | |
| "eval_runtime": 2.4209, | |
| "eval_samples_per_second": 803.426, | |
| "eval_steps_per_second": 12.805, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 40.209150326797385, | |
| "eval_loss": 1.939374327659607, | |
| "eval_runtime": 2.4652, | |
| "eval_samples_per_second": 788.974, | |
| "eval_steps_per_second": 12.575, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 40.31372549019608, | |
| "eval_loss": 1.9959666728973389, | |
| "eval_runtime": 2.3847, | |
| "eval_samples_per_second": 815.601, | |
| "eval_steps_per_second": 12.999, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 40.41830065359477, | |
| "eval_loss": 1.9789413213729858, | |
| "eval_runtime": 2.3893, | |
| "eval_samples_per_second": 814.046, | |
| "eval_steps_per_second": 12.975, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 40.52287581699346, | |
| "eval_loss": 1.9750434160232544, | |
| "eval_runtime": 2.3871, | |
| "eval_samples_per_second": 814.781, | |
| "eval_steps_per_second": 12.986, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 40.627450980392155, | |
| "eval_loss": 1.9399486780166626, | |
| "eval_runtime": 2.4366, | |
| "eval_samples_per_second": 798.229, | |
| "eval_steps_per_second": 12.722, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 40.73202614379085, | |
| "eval_loss": 1.9516425132751465, | |
| "eval_runtime": 2.4136, | |
| "eval_samples_per_second": 805.85, | |
| "eval_steps_per_second": 12.844, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 40.83660130718954, | |
| "eval_loss": 1.941023826599121, | |
| "eval_runtime": 2.4094, | |
| "eval_samples_per_second": 807.251, | |
| "eval_steps_per_second": 12.866, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 40.94117647058823, | |
| "eval_loss": 1.9317693710327148, | |
| "eval_runtime": 2.4091, | |
| "eval_samples_per_second": 807.345, | |
| "eval_steps_per_second": 12.868, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 40.94117647058823, | |
| "grad_norm": 1.7872660160064697, | |
| "learning_rate": 3.8297872340425535e-06, | |
| "loss": 1.7043, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 41.10457516339869, | |
| "eval_loss": 1.9890044927597046, | |
| "eval_runtime": 2.414, | |
| "eval_samples_per_second": 805.708, | |
| "eval_steps_per_second": 12.842, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 41.209150326797385, | |
| "eval_loss": 1.9840960502624512, | |
| "eval_runtime": 2.3959, | |
| "eval_samples_per_second": 811.809, | |
| "eval_steps_per_second": 12.939, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 41.31372549019608, | |
| "eval_loss": 1.9188443422317505, | |
| "eval_runtime": 2.4069, | |
| "eval_samples_per_second": 808.088, | |
| "eval_steps_per_second": 12.88, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 41.41830065359477, | |
| "eval_loss": 1.9614779949188232, | |
| "eval_runtime": 2.3886, | |
| "eval_samples_per_second": 814.274, | |
| "eval_steps_per_second": 12.978, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 41.52287581699346, | |
| "eval_loss": 1.9061365127563477, | |
| "eval_runtime": 2.3924, | |
| "eval_samples_per_second": 812.981, | |
| "eval_steps_per_second": 12.958, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 41.627450980392155, | |
| "eval_loss": 1.909993052482605, | |
| "eval_runtime": 2.3858, | |
| "eval_samples_per_second": 815.236, | |
| "eval_steps_per_second": 12.993, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 41.73202614379085, | |
| "eval_loss": 1.9422426223754883, | |
| "eval_runtime": 2.3888, | |
| "eval_samples_per_second": 814.201, | |
| "eval_steps_per_second": 12.977, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 41.83660130718954, | |
| "eval_loss": 1.9640315771102905, | |
| "eval_runtime": 2.4376, | |
| "eval_samples_per_second": 797.911, | |
| "eval_steps_per_second": 12.717, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 41.94117647058823, | |
| "eval_loss": 1.917662262916565, | |
| "eval_runtime": 2.4538, | |
| "eval_samples_per_second": 792.647, | |
| "eval_steps_per_second": 12.633, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 41.94117647058823, | |
| "grad_norm": 1.7721134424209595, | |
| "learning_rate": 3.4042553191489363e-06, | |
| "loss": 1.7169, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 42.10457516339869, | |
| "eval_loss": 1.9163570404052734, | |
| "eval_runtime": 2.4281, | |
| "eval_samples_per_second": 801.034, | |
| "eval_steps_per_second": 12.767, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 42.209150326797385, | |
| "eval_loss": 1.9374709129333496, | |
| "eval_runtime": 2.4192, | |
| "eval_samples_per_second": 803.999, | |
| "eval_steps_per_second": 12.814, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 42.31372549019608, | |
| "eval_loss": 1.9525771141052246, | |
| "eval_runtime": 2.4174, | |
| "eval_samples_per_second": 804.581, | |
| "eval_steps_per_second": 12.824, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 42.41830065359477, | |
| "eval_loss": 1.938783884048462, | |
| "eval_runtime": 2.3891, | |
| "eval_samples_per_second": 814.119, | |
| "eval_steps_per_second": 12.976, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 42.52287581699346, | |
| "eval_loss": 1.9378857612609863, | |
| "eval_runtime": 2.3879, | |
| "eval_samples_per_second": 814.515, | |
| "eval_steps_per_second": 12.982, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 42.627450980392155, | |
| "eval_loss": 1.931535243988037, | |
| "eval_runtime": 2.3924, | |
| "eval_samples_per_second": 812.994, | |
| "eval_steps_per_second": 12.958, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 42.73202614379085, | |
| "eval_loss": 1.9418144226074219, | |
| "eval_runtime": 2.3917, | |
| "eval_samples_per_second": 813.219, | |
| "eval_steps_per_second": 12.961, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 42.83660130718954, | |
| "eval_loss": 1.9460214376449585, | |
| "eval_runtime": 2.417, | |
| "eval_samples_per_second": 804.722, | |
| "eval_steps_per_second": 12.826, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 42.94117647058823, | |
| "eval_loss": 1.9129729270935059, | |
| "eval_runtime": 2.3889, | |
| "eval_samples_per_second": 814.19, | |
| "eval_steps_per_second": 12.977, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 42.94117647058823, | |
| "grad_norm": 1.8166015148162842, | |
| "learning_rate": 2.978723404255319e-06, | |
| "loss": 1.7315, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 43.10457516339869, | |
| "eval_loss": 1.9539881944656372, | |
| "eval_runtime": 2.4601, | |
| "eval_samples_per_second": 790.624, | |
| "eval_steps_per_second": 12.601, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 43.209150326797385, | |
| "eval_loss": 1.951253890991211, | |
| "eval_runtime": 2.4131, | |
| "eval_samples_per_second": 806.019, | |
| "eval_steps_per_second": 12.847, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 43.31372549019608, | |
| "eval_loss": 2.0078840255737305, | |
| "eval_runtime": 2.4151, | |
| "eval_samples_per_second": 805.365, | |
| "eval_steps_per_second": 12.836, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 43.41830065359477, | |
| "eval_loss": 1.9754467010498047, | |
| "eval_runtime": 2.409, | |
| "eval_samples_per_second": 807.392, | |
| "eval_steps_per_second": 12.868, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 43.52287581699346, | |
| "eval_loss": 1.972512125968933, | |
| "eval_runtime": 2.4172, | |
| "eval_samples_per_second": 804.635, | |
| "eval_steps_per_second": 12.825, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 43.627450980392155, | |
| "eval_loss": 1.9601085186004639, | |
| "eval_runtime": 2.4555, | |
| "eval_samples_per_second": 792.113, | |
| "eval_steps_per_second": 12.625, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 43.73202614379085, | |
| "eval_loss": 1.9266124963760376, | |
| "eval_runtime": 2.4604, | |
| "eval_samples_per_second": 790.516, | |
| "eval_steps_per_second": 12.599, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 43.83660130718954, | |
| "eval_loss": 1.9546335935592651, | |
| "eval_runtime": 2.467, | |
| "eval_samples_per_second": 788.405, | |
| "eval_steps_per_second": 12.566, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 43.94117647058823, | |
| "eval_loss": 1.918619990348816, | |
| "eval_runtime": 2.4607, | |
| "eval_samples_per_second": 790.428, | |
| "eval_steps_per_second": 12.598, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 43.94117647058823, | |
| "grad_norm": 1.8490442037582397, | |
| "learning_rate": 2.553191489361702e-06, | |
| "loss": 1.7095, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 44.10457516339869, | |
| "eval_loss": 1.9544674158096313, | |
| "eval_runtime": 2.5483, | |
| "eval_samples_per_second": 763.254, | |
| "eval_steps_per_second": 12.165, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 44.209150326797385, | |
| "eval_loss": 2.0218536853790283, | |
| "eval_runtime": 2.578, | |
| "eval_samples_per_second": 754.467, | |
| "eval_steps_per_second": 12.025, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 44.31372549019608, | |
| "eval_loss": 1.9499460458755493, | |
| "eval_runtime": 2.4731, | |
| "eval_samples_per_second": 786.477, | |
| "eval_steps_per_second": 12.535, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 44.41830065359477, | |
| "eval_loss": 1.9414080381393433, | |
| "eval_runtime": 2.4805, | |
| "eval_samples_per_second": 784.104, | |
| "eval_steps_per_second": 12.497, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 44.52287581699346, | |
| "eval_loss": 1.961714506149292, | |
| "eval_runtime": 2.4673, | |
| "eval_samples_per_second": 788.301, | |
| "eval_steps_per_second": 12.564, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 44.627450980392155, | |
| "eval_loss": 1.9939833879470825, | |
| "eval_runtime": 2.4722, | |
| "eval_samples_per_second": 786.751, | |
| "eval_steps_per_second": 12.539, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 44.73202614379085, | |
| "eval_loss": 1.9617350101470947, | |
| "eval_runtime": 2.4683, | |
| "eval_samples_per_second": 787.977, | |
| "eval_steps_per_second": 12.559, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 44.83660130718954, | |
| "eval_loss": 1.9692201614379883, | |
| "eval_runtime": 2.4374, | |
| "eval_samples_per_second": 797.97, | |
| "eval_steps_per_second": 12.718, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 44.94117647058823, | |
| "eval_loss": 1.9219003915786743, | |
| "eval_runtime": 2.435, | |
| "eval_samples_per_second": 798.78, | |
| "eval_steps_per_second": 12.731, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 44.94117647058823, | |
| "grad_norm": 1.796848177909851, | |
| "learning_rate": 2.1276595744680853e-06, | |
| "loss": 1.7071, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 45.10457516339869, | |
| "eval_loss": 1.9611177444458008, | |
| "eval_runtime": 2.4195, | |
| "eval_samples_per_second": 803.881, | |
| "eval_steps_per_second": 12.812, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 45.209150326797385, | |
| "eval_loss": 1.9778918027877808, | |
| "eval_runtime": 2.4197, | |
| "eval_samples_per_second": 803.821, | |
| "eval_steps_per_second": 12.812, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 45.31372549019608, | |
| "eval_loss": 1.9238309860229492, | |
| "eval_runtime": 2.4635, | |
| "eval_samples_per_second": 789.519, | |
| "eval_steps_per_second": 12.584, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 45.41830065359477, | |
| "eval_loss": 1.9089758396148682, | |
| "eval_runtime": 2.4247, | |
| "eval_samples_per_second": 802.146, | |
| "eval_steps_per_second": 12.785, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 45.52287581699346, | |
| "eval_loss": 1.9342485666275024, | |
| "eval_runtime": 2.4113, | |
| "eval_samples_per_second": 806.609, | |
| "eval_steps_per_second": 12.856, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 45.627450980392155, | |
| "eval_loss": 1.9936097860336304, | |
| "eval_runtime": 2.4133, | |
| "eval_samples_per_second": 805.937, | |
| "eval_steps_per_second": 12.845, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 45.73202614379085, | |
| "eval_loss": 1.897844672203064, | |
| "eval_runtime": 2.4034, | |
| "eval_samples_per_second": 809.278, | |
| "eval_steps_per_second": 12.899, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 45.83660130718954, | |
| "eval_loss": 1.9208406209945679, | |
| "eval_runtime": 2.3918, | |
| "eval_samples_per_second": 813.195, | |
| "eval_steps_per_second": 12.961, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 45.94117647058823, | |
| "eval_loss": 1.9177494049072266, | |
| "eval_runtime": 2.4571, | |
| "eval_samples_per_second": 791.578, | |
| "eval_steps_per_second": 12.616, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 45.94117647058823, | |
| "grad_norm": 1.7152032852172852, | |
| "learning_rate": 1.7021276595744682e-06, | |
| "loss": 1.7116, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 46.10457516339869, | |
| "eval_loss": 1.957858681678772, | |
| "eval_runtime": 2.4339, | |
| "eval_samples_per_second": 799.125, | |
| "eval_steps_per_second": 12.737, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 46.209150326797385, | |
| "eval_loss": 1.9422305822372437, | |
| "eval_runtime": 2.4497, | |
| "eval_samples_per_second": 793.973, | |
| "eval_steps_per_second": 12.655, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 46.31372549019608, | |
| "eval_loss": 1.9287089109420776, | |
| "eval_runtime": 2.423, | |
| "eval_samples_per_second": 802.723, | |
| "eval_steps_per_second": 12.794, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 46.41830065359477, | |
| "eval_loss": 1.9444739818572998, | |
| "eval_runtime": 2.4956, | |
| "eval_samples_per_second": 779.376, | |
| "eval_steps_per_second": 12.422, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 46.52287581699346, | |
| "eval_loss": 1.923707127571106, | |
| "eval_runtime": 2.4651, | |
| "eval_samples_per_second": 789.011, | |
| "eval_steps_per_second": 12.575, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 46.627450980392155, | |
| "eval_loss": 1.9269739389419556, | |
| "eval_runtime": 2.4648, | |
| "eval_samples_per_second": 789.121, | |
| "eval_steps_per_second": 12.577, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 46.73202614379085, | |
| "eval_loss": 1.9492802619934082, | |
| "eval_runtime": 2.4582, | |
| "eval_samples_per_second": 791.229, | |
| "eval_steps_per_second": 12.611, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 46.83660130718954, | |
| "eval_loss": 1.9743090867996216, | |
| "eval_runtime": 2.4551, | |
| "eval_samples_per_second": 792.232, | |
| "eval_steps_per_second": 12.627, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 46.94117647058823, | |
| "eval_loss": 1.957751750946045, | |
| "eval_runtime": 2.434, | |
| "eval_samples_per_second": 799.105, | |
| "eval_steps_per_second": 12.736, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 46.94117647058823, | |
| "grad_norm": 1.9101431369781494, | |
| "learning_rate": 1.276595744680851e-06, | |
| "loss": 1.733, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 47.10457516339869, | |
| "eval_loss": 1.9019426107406616, | |
| "eval_runtime": 2.4414, | |
| "eval_samples_per_second": 796.664, | |
| "eval_steps_per_second": 12.697, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 47.209150326797385, | |
| "eval_loss": 1.944284200668335, | |
| "eval_runtime": 2.4514, | |
| "eval_samples_per_second": 793.435, | |
| "eval_steps_per_second": 12.646, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 47.31372549019608, | |
| "eval_loss": 1.9661508798599243, | |
| "eval_runtime": 2.4442, | |
| "eval_samples_per_second": 795.747, | |
| "eval_steps_per_second": 12.683, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 47.41830065359477, | |
| "eval_loss": 1.9728316068649292, | |
| "eval_runtime": 2.4822, | |
| "eval_samples_per_second": 783.586, | |
| "eval_steps_per_second": 12.489, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 47.52287581699346, | |
| "eval_loss": 1.923363208770752, | |
| "eval_runtime": 2.4644, | |
| "eval_samples_per_second": 789.251, | |
| "eval_steps_per_second": 12.579, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 47.627450980392155, | |
| "eval_loss": 1.9165805578231812, | |
| "eval_runtime": 2.3965, | |
| "eval_samples_per_second": 811.601, | |
| "eval_steps_per_second": 12.936, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 47.73202614379085, | |
| "eval_loss": 1.9413442611694336, | |
| "eval_runtime": 2.4408, | |
| "eval_samples_per_second": 796.869, | |
| "eval_steps_per_second": 12.701, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 47.83660130718954, | |
| "eval_loss": 1.8855735063552856, | |
| "eval_runtime": 2.4507, | |
| "eval_samples_per_second": 793.659, | |
| "eval_steps_per_second": 12.65, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 47.94117647058823, | |
| "eval_loss": 1.952731728553772, | |
| "eval_runtime": 2.5241, | |
| "eval_samples_per_second": 770.567, | |
| "eval_steps_per_second": 12.282, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 47.94117647058823, | |
| "grad_norm": 1.7582765817642212, | |
| "learning_rate": 8.510638297872341e-07, | |
| "loss": 1.7065, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 48.10457516339869, | |
| "eval_loss": 1.9524160623550415, | |
| "eval_runtime": 2.4138, | |
| "eval_samples_per_second": 805.783, | |
| "eval_steps_per_second": 12.843, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 48.209150326797385, | |
| "eval_loss": 1.9682825803756714, | |
| "eval_runtime": 2.5144, | |
| "eval_samples_per_second": 773.543, | |
| "eval_steps_per_second": 12.329, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 48.31372549019608, | |
| "eval_loss": 1.9489309787750244, | |
| "eval_runtime": 2.5744, | |
| "eval_samples_per_second": 755.51, | |
| "eval_steps_per_second": 12.042, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 48.41830065359477, | |
| "eval_loss": 1.9564448595046997, | |
| "eval_runtime": 2.4521, | |
| "eval_samples_per_second": 793.202, | |
| "eval_steps_per_second": 12.642, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 48.52287581699346, | |
| "eval_loss": 1.9767297506332397, | |
| "eval_runtime": 2.4404, | |
| "eval_samples_per_second": 796.999, | |
| "eval_steps_per_second": 12.703, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 48.627450980392155, | |
| "eval_loss": 1.9058864116668701, | |
| "eval_runtime": 2.4851, | |
| "eval_samples_per_second": 782.665, | |
| "eval_steps_per_second": 12.474, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 48.73202614379085, | |
| "eval_loss": 1.9629017114639282, | |
| "eval_runtime": 2.4371, | |
| "eval_samples_per_second": 798.081, | |
| "eval_steps_per_second": 12.72, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 48.83660130718954, | |
| "eval_loss": 1.937988519668579, | |
| "eval_runtime": 2.4379, | |
| "eval_samples_per_second": 797.822, | |
| "eval_steps_per_second": 12.716, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 48.94117647058823, | |
| "eval_loss": 1.9694868326187134, | |
| "eval_runtime": 2.4378, | |
| "eval_samples_per_second": 797.855, | |
| "eval_steps_per_second": 12.716, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 48.94117647058823, | |
| "grad_norm": 1.9079190492630005, | |
| "learning_rate": 4.2553191489361704e-07, | |
| "loss": 1.6997, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 49.10457516339869, | |
| "eval_loss": 1.9250315427780151, | |
| "eval_runtime": 2.4353, | |
| "eval_samples_per_second": 798.679, | |
| "eval_steps_per_second": 12.73, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 49.209150326797385, | |
| "eval_loss": 1.9391655921936035, | |
| "eval_runtime": 2.4099, | |
| "eval_samples_per_second": 807.094, | |
| "eval_steps_per_second": 12.864, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 49.31372549019608, | |
| "eval_loss": 1.9522807598114014, | |
| "eval_runtime": 2.4344, | |
| "eval_samples_per_second": 798.963, | |
| "eval_steps_per_second": 12.734, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 49.41830065359477, | |
| "eval_loss": 1.9459158182144165, | |
| "eval_runtime": 2.4156, | |
| "eval_samples_per_second": 805.199, | |
| "eval_steps_per_second": 12.834, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 49.52287581699346, | |
| "eval_loss": 1.9365217685699463, | |
| "eval_runtime": 2.4071, | |
| "eval_samples_per_second": 808.028, | |
| "eval_steps_per_second": 12.879, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 49.627450980392155, | |
| "eval_loss": 1.9133816957473755, | |
| "eval_runtime": 2.3906, | |
| "eval_samples_per_second": 813.594, | |
| "eval_steps_per_second": 12.967, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 49.73202614379085, | |
| "eval_loss": 1.9433826208114624, | |
| "eval_runtime": 2.4005, | |
| "eval_samples_per_second": 810.237, | |
| "eval_steps_per_second": 12.914, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 49.83660130718954, | |
| "eval_loss": 1.9771692752838135, | |
| "eval_runtime": 2.3888, | |
| "eval_samples_per_second": 814.201, | |
| "eval_steps_per_second": 12.977, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 49.94117647058823, | |
| "eval_loss": 1.9776495695114136, | |
| "eval_runtime": 2.3848, | |
| "eval_samples_per_second": 815.59, | |
| "eval_steps_per_second": 12.999, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 49.94117647058823, | |
| "grad_norm": 1.8996864557266235, | |
| "learning_rate": 0.0, | |
| "loss": 1.6668, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 49.94117647058823, | |
| "step": 450, | |
| "total_flos": 1.2548402868338688e+16, | |
| "train_loss": 1.8820014402601455, | |
| "train_runtime": 2628.924, | |
| "train_samples_per_second": 185.114, | |
| "train_steps_per_second": 0.171 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 450, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2548402868338688e+16, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |