| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 100, |
| "global_step": 910, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.02202036884117809, |
| "grad_norm": 8.139582633972168, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 1.04783125, |
| "loss_accumulated": 16.7653, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04404073768235618, |
| "grad_norm": 6.492475509643555, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 1.2244375, |
| "loss_accumulated": 19.591, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06606110652353427, |
| "grad_norm": 8.457890510559082, |
| "learning_rate": 5.8e-06, |
| "loss": 1.0909875, |
| "loss_accumulated": 17.4558, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.08808147536471236, |
| "grad_norm": 5.892795085906982, |
| "learning_rate": 7.800000000000002e-06, |
| "loss": 1.07938125, |
| "loss_accumulated": 17.2701, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11010184420589045, |
| "grad_norm": 6.56881856918335, |
| "learning_rate": 9.800000000000001e-06, |
| "loss": 1.10010625, |
| "loss_accumulated": 17.6017, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.13212221304706853, |
| "grad_norm": 4.701152801513672, |
| "learning_rate": 9.895348837209303e-06, |
| "loss": 1.0741625, |
| "loss_accumulated": 17.1866, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.15414258188824662, |
| "grad_norm": 9.48551082611084, |
| "learning_rate": 9.779069767441862e-06, |
| "loss": 1.04658125, |
| "loss_accumulated": 16.7453, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.17616295072942473, |
| "grad_norm": 5.215735912322998, |
| "learning_rate": 9.662790697674419e-06, |
| "loss": 1.044925, |
| "loss_accumulated": 16.7188, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1981833195706028, |
| "grad_norm": 6.0997314453125, |
| "learning_rate": 9.546511627906978e-06, |
| "loss": 1.05530625, |
| "loss_accumulated": 16.8849, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.2202036884117809, |
| "grad_norm": 6.653469085693359, |
| "learning_rate": 9.430232558139536e-06, |
| "loss": 1.08026875, |
| "loss_accumulated": 17.2843, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.2202036884117809, |
| "eval_loss": 1.0868104696273804, |
| "eval_runtime": 105.087, |
| "eval_samples_per_second": 7.689, |
| "eval_steps_per_second": 7.689, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.24222405725295898, |
| "grad_norm": 5.707369804382324, |
| "learning_rate": 9.313953488372095e-06, |
| "loss": 1.24023125, |
| "loss_accumulated": 19.8437, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.26424442609413706, |
| "grad_norm": 8.452932357788086, |
| "learning_rate": 9.197674418604652e-06, |
| "loss": 1.02721875, |
| "loss_accumulated": 16.4355, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.28626479493531515, |
| "grad_norm": 10.253166198730469, |
| "learning_rate": 9.08139534883721e-06, |
| "loss": 1.0215, |
| "loss_accumulated": 16.344, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.30828516377649323, |
| "grad_norm": 7.390908241271973, |
| "learning_rate": 8.965116279069767e-06, |
| "loss": 1.0461375, |
| "loss_accumulated": 16.7382, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.33030553261767137, |
| "grad_norm": 8.768054962158203, |
| "learning_rate": 8.848837209302326e-06, |
| "loss": 1.0388375, |
| "loss_accumulated": 16.6214, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.35232590145884946, |
| "grad_norm": 7.710715293884277, |
| "learning_rate": 8.732558139534885e-06, |
| "loss": 1.04136875, |
| "loss_accumulated": 16.6619, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.37434627030002754, |
| "grad_norm": 7.349565029144287, |
| "learning_rate": 8.616279069767443e-06, |
| "loss": 1.0436375, |
| "loss_accumulated": 16.6982, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.3963666391412056, |
| "grad_norm": 9.514286994934082, |
| "learning_rate": 8.5e-06, |
| "loss": 1.03064375, |
| "loss_accumulated": 16.4903, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4183870079823837, |
| "grad_norm": 10.636228561401367, |
| "learning_rate": 8.383720930232559e-06, |
| "loss": 1.06431875, |
| "loss_accumulated": 17.0291, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4404073768235618, |
| "grad_norm": 8.870360374450684, |
| "learning_rate": 8.267441860465118e-06, |
| "loss": 1.04586875, |
| "loss_accumulated": 16.7339, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4404073768235618, |
| "eval_loss": 1.0789271593093872, |
| "eval_runtime": 135.2266, |
| "eval_samples_per_second": 5.975, |
| "eval_steps_per_second": 5.975, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4624277456647399, |
| "grad_norm": 12.370259284973145, |
| "learning_rate": 8.151162790697676e-06, |
| "loss": 1.0686875, |
| "loss_accumulated": 17.099, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.48444811450591796, |
| "grad_norm": 9.170878410339355, |
| "learning_rate": 8.034883720930233e-06, |
| "loss": 1.07258125, |
| "loss_accumulated": 17.1613, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.506468483347096, |
| "grad_norm": 14.712733268737793, |
| "learning_rate": 7.918604651162792e-06, |
| "loss": 1.09678125, |
| "loss_accumulated": 17.5485, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5284888521882741, |
| "grad_norm": 9.565340042114258, |
| "learning_rate": 7.80232558139535e-06, |
| "loss": 1.12413125, |
| "loss_accumulated": 17.9861, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5505092210294522, |
| "grad_norm": 11.5183744430542, |
| "learning_rate": 7.686046511627909e-06, |
| "loss": 1.0627125, |
| "loss_accumulated": 17.0034, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5725295898706303, |
| "grad_norm": 8.43002986907959, |
| "learning_rate": 7.569767441860466e-06, |
| "loss": 1.043175, |
| "loss_accumulated": 16.6908, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5945499587118084, |
| "grad_norm": 8.955143928527832, |
| "learning_rate": 7.453488372093024e-06, |
| "loss": 1.020925, |
| "loss_accumulated": 16.3348, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6165703275529865, |
| "grad_norm": 9.91588020324707, |
| "learning_rate": 7.3372093023255816e-06, |
| "loss": 1.1777, |
| "loss_accumulated": 18.8432, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6385906963941645, |
| "grad_norm": 13.177949905395508, |
| "learning_rate": 7.22093023255814e-06, |
| "loss": 1.0682625, |
| "loss_accumulated": 17.0922, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6606110652353427, |
| "grad_norm": 9.943979263305664, |
| "learning_rate": 7.104651162790698e-06, |
| "loss": 1.03318125, |
| "loss_accumulated": 16.5309, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6606110652353427, |
| "eval_loss": 1.0708842277526855, |
| "eval_runtime": 105.1052, |
| "eval_samples_per_second": 7.688, |
| "eval_steps_per_second": 7.688, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6826314340765208, |
| "grad_norm": 14.787008285522461, |
| "learning_rate": 6.988372093023257e-06, |
| "loss": 1.2248875, |
| "loss_accumulated": 19.5982, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7046518029176989, |
| "grad_norm": 16.776479721069336, |
| "learning_rate": 6.8720930232558146e-06, |
| "loss": 1.07100625, |
| "loss_accumulated": 17.1361, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.726672171758877, |
| "grad_norm": 10.714720726013184, |
| "learning_rate": 6.755813953488373e-06, |
| "loss": 1.1591, |
| "loss_accumulated": 18.5456, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7486925406000551, |
| "grad_norm": 9.997598648071289, |
| "learning_rate": 6.63953488372093e-06, |
| "loss": 1.1428, |
| "loss_accumulated": 18.2848, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7707129094412332, |
| "grad_norm": 11.680377006530762, |
| "learning_rate": 6.5232558139534885e-06, |
| "loss": 1.0948625, |
| "loss_accumulated": 17.5178, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7927332782824112, |
| "grad_norm": 11.191390037536621, |
| "learning_rate": 6.4069767441860476e-06, |
| "loss": 1.07081875, |
| "loss_accumulated": 17.1331, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8147536471235893, |
| "grad_norm": 13.758176803588867, |
| "learning_rate": 6.290697674418606e-06, |
| "loss": 1.04526875, |
| "loss_accumulated": 16.7243, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8367740159647674, |
| "grad_norm": 17.639863967895508, |
| "learning_rate": 6.174418604651163e-06, |
| "loss": 1.07876875, |
| "loss_accumulated": 17.2603, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8587943848059455, |
| "grad_norm": 10.742379188537598, |
| "learning_rate": 6.0581395348837215e-06, |
| "loss": 1.12343125, |
| "loss_accumulated": 17.9749, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8808147536471236, |
| "grad_norm": 11.99518871307373, |
| "learning_rate": 5.941860465116279e-06, |
| "loss": 1.017175, |
| "loss_accumulated": 16.2748, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8808147536471236, |
| "eval_loss": 1.0637564659118652, |
| "eval_runtime": 109.5725, |
| "eval_samples_per_second": 7.374, |
| "eval_steps_per_second": 7.374, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9028351224883017, |
| "grad_norm": 14.046647071838379, |
| "learning_rate": 5.825581395348837e-06, |
| "loss": 1.1387875, |
| "loss_accumulated": 18.2206, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9248554913294798, |
| "grad_norm": 15.011589050292969, |
| "learning_rate": 5.709302325581396e-06, |
| "loss": 1.02514375, |
| "loss_accumulated": 16.4023, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9468758601706578, |
| "grad_norm": 11.991171836853027, |
| "learning_rate": 5.5930232558139544e-06, |
| "loss": 1.0738, |
| "loss_accumulated": 17.1808, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.9688962290118359, |
| "grad_norm": 12.477208137512207, |
| "learning_rate": 5.476744186046512e-06, |
| "loss": 1.05196875, |
| "loss_accumulated": 16.8315, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.990916597853014, |
| "grad_norm": 12.482328414916992, |
| "learning_rate": 5.36046511627907e-06, |
| "loss": 1.06891875, |
| "loss_accumulated": 17.1027, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.011010184420589, |
| "grad_norm": 21.27669906616211, |
| "learning_rate": 5.2441860465116275e-06, |
| "loss": 0.95168125, |
| "loss_accumulated": 15.2269, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0330305532617672, |
| "grad_norm": 13.355972290039062, |
| "learning_rate": 5.127906976744187e-06, |
| "loss": 1.06833125, |
| "loss_accumulated": 17.0933, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0550509221029452, |
| "grad_norm": 15.472939491271973, |
| "learning_rate": 5.011627906976745e-06, |
| "loss": 1.1307125, |
| "loss_accumulated": 18.0914, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.0770712909441233, |
| "grad_norm": 12.726252555847168, |
| "learning_rate": 4.895348837209303e-06, |
| "loss": 1.05006875, |
| "loss_accumulated": 16.8011, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.0990916597853013, |
| "grad_norm": 14.358366966247559, |
| "learning_rate": 4.7790697674418605e-06, |
| "loss": 1.0619125, |
| "loss_accumulated": 16.9906, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.0990916597853013, |
| "eval_loss": 1.0603028535842896, |
| "eval_runtime": 112.2463, |
| "eval_samples_per_second": 7.198, |
| "eval_steps_per_second": 7.198, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1211120286264795, |
| "grad_norm": 13.931950569152832, |
| "learning_rate": 4.66279069767442e-06, |
| "loss": 1.05581875, |
| "loss_accumulated": 16.8931, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1431323974676575, |
| "grad_norm": 12.517531394958496, |
| "learning_rate": 4.546511627906977e-06, |
| "loss": 1.05785, |
| "loss_accumulated": 16.9256, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.1651527663088357, |
| "grad_norm": 17.931734085083008, |
| "learning_rate": 4.430232558139535e-06, |
| "loss": 0.9880875, |
| "loss_accumulated": 15.8094, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.1871731351500139, |
| "grad_norm": 13.656305313110352, |
| "learning_rate": 4.3139534883720935e-06, |
| "loss": 1.0042, |
| "loss_accumulated": 16.0672, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2091935039911919, |
| "grad_norm": 17.034332275390625, |
| "learning_rate": 4.197674418604652e-06, |
| "loss": 1.054775, |
| "loss_accumulated": 16.8764, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2312138728323698, |
| "grad_norm": 12.149453163146973, |
| "learning_rate": 4.08139534883721e-06, |
| "loss": 1.062975, |
| "loss_accumulated": 17.0076, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.253234241673548, |
| "grad_norm": 12.923322677612305, |
| "learning_rate": 3.965116279069768e-06, |
| "loss": 1.18115, |
| "loss_accumulated": 18.8984, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.2752546105147262, |
| "grad_norm": 12.064355850219727, |
| "learning_rate": 3.848837209302326e-06, |
| "loss": 0.98351875, |
| "loss_accumulated": 15.7363, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.2972749793559042, |
| "grad_norm": 14.70433521270752, |
| "learning_rate": 3.7325581395348843e-06, |
| "loss": 1.0176125, |
| "loss_accumulated": 16.2818, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3192953481970822, |
| "grad_norm": 14.562840461730957, |
| "learning_rate": 3.616279069767442e-06, |
| "loss": 1.00518125, |
| "loss_accumulated": 16.0829, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3192953481970822, |
| "eval_loss": 1.056916356086731, |
| "eval_runtime": 111.8754, |
| "eval_samples_per_second": 7.222, |
| "eval_steps_per_second": 7.222, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3413157170382604, |
| "grad_norm": 17.57223129272461, |
| "learning_rate": 3.5e-06, |
| "loss": 1.0139625, |
| "loss_accumulated": 16.2234, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.3633360858794386, |
| "grad_norm": 21.535526275634766, |
| "learning_rate": 3.3837209302325586e-06, |
| "loss": 1.002475, |
| "loss_accumulated": 16.0396, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.3853564547206165, |
| "grad_norm": 17.39263916015625, |
| "learning_rate": 3.2674418604651164e-06, |
| "loss": 1.035375, |
| "loss_accumulated": 16.566, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.4073768235617947, |
| "grad_norm": 12.677567481994629, |
| "learning_rate": 3.151162790697675e-06, |
| "loss": 1.0291, |
| "loss_accumulated": 16.4656, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4293971924029727, |
| "grad_norm": 18.014575958251953, |
| "learning_rate": 3.034883720930233e-06, |
| "loss": 1.04011875, |
| "loss_accumulated": 16.6419, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4514175612441509, |
| "grad_norm": 13.09684944152832, |
| "learning_rate": 2.9186046511627908e-06, |
| "loss": 1.0107125, |
| "loss_accumulated": 16.1714, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.4734379300853289, |
| "grad_norm": 19.25403594970703, |
| "learning_rate": 2.8023255813953494e-06, |
| "loss": 1.05929375, |
| "loss_accumulated": 16.9487, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.495458298926507, |
| "grad_norm": 14.488405227661133, |
| "learning_rate": 2.6860465116279073e-06, |
| "loss": 1.0660875, |
| "loss_accumulated": 17.0574, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.5174786677676853, |
| "grad_norm": 31.23447036743164, |
| "learning_rate": 2.569767441860465e-06, |
| "loss": 1.08340625, |
| "loss_accumulated": 17.3345, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.5394990366088632, |
| "grad_norm": 14.270586967468262, |
| "learning_rate": 2.4534883720930233e-06, |
| "loss": 1.0713625, |
| "loss_accumulated": 17.1418, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5394990366088632, |
| "eval_loss": 1.054555892944336, |
| "eval_runtime": 105.0636, |
| "eval_samples_per_second": 7.691, |
| "eval_steps_per_second": 7.691, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.5615194054500412, |
| "grad_norm": 13.536142349243164, |
| "learning_rate": 2.3372093023255816e-06, |
| "loss": 1.01050625, |
| "loss_accumulated": 16.1681, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.5835397742912194, |
| "grad_norm": 12.942073822021484, |
| "learning_rate": 2.22093023255814e-06, |
| "loss": 1.0091625, |
| "loss_accumulated": 16.1466, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6055601431323976, |
| "grad_norm": 13.64247989654541, |
| "learning_rate": 2.104651162790698e-06, |
| "loss": 1.1232375, |
| "loss_accumulated": 17.9718, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6275805119735756, |
| "grad_norm": 17.50322914123535, |
| "learning_rate": 1.988372093023256e-06, |
| "loss": 1.08881875, |
| "loss_accumulated": 17.4211, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.6496008808147535, |
| "grad_norm": 14.39642333984375, |
| "learning_rate": 1.872093023255814e-06, |
| "loss": 1.00690625, |
| "loss_accumulated": 16.1105, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6716212496559317, |
| "grad_norm": 17.793312072753906, |
| "learning_rate": 1.7558139534883722e-06, |
| "loss": 1.029125, |
| "loss_accumulated": 16.466, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.69364161849711, |
| "grad_norm": 14.634993553161621, |
| "learning_rate": 1.6395348837209304e-06, |
| "loss": 1.1446875, |
| "loss_accumulated": 18.315, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.715661987338288, |
| "grad_norm": 15.071901321411133, |
| "learning_rate": 1.5232558139534885e-06, |
| "loss": 1.05595, |
| "loss_accumulated": 16.8952, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7376823561794659, |
| "grad_norm": 16.431106567382812, |
| "learning_rate": 1.4069767441860465e-06, |
| "loss": 1.1066, |
| "loss_accumulated": 17.7056, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.759702725020644, |
| "grad_norm": 13.163710594177246, |
| "learning_rate": 1.2906976744186048e-06, |
| "loss": 1.12951875, |
| "loss_accumulated": 18.0723, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.759702725020644, |
| "eval_loss": 1.0527995824813843, |
| "eval_runtime": 105.1441, |
| "eval_samples_per_second": 7.685, |
| "eval_steps_per_second": 7.685, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.7817230938618223, |
| "grad_norm": 21.816795349121094, |
| "learning_rate": 1.1744186046511628e-06, |
| "loss": 1.082375, |
| "loss_accumulated": 17.318, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.8037434627030002, |
| "grad_norm": 14.944353103637695, |
| "learning_rate": 1.058139534883721e-06, |
| "loss": 1.02875, |
| "loss_accumulated": 16.46, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8257638315441782, |
| "grad_norm": 13.911181449890137, |
| "learning_rate": 9.418604651162791e-07, |
| "loss": 1.036625, |
| "loss_accumulated": 16.586, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.8477842003853564, |
| "grad_norm": 15.232218742370605, |
| "learning_rate": 8.255813953488373e-07, |
| "loss": 1.0375875, |
| "loss_accumulated": 16.6014, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.8698045692265346, |
| "grad_norm": 22.372093200683594, |
| "learning_rate": 7.093023255813954e-07, |
| "loss": 1.071775, |
| "loss_accumulated": 17.1484, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.8918249380677126, |
| "grad_norm": 23.259920120239258, |
| "learning_rate": 5.930232558139536e-07, |
| "loss": 1.01205625, |
| "loss_accumulated": 16.1929, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9138453069088908, |
| "grad_norm": 14.526731491088867, |
| "learning_rate": 4.767441860465117e-07, |
| "loss": 1.09719375, |
| "loss_accumulated": 17.5551, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.935865675750069, |
| "grad_norm": 18.644268035888672, |
| "learning_rate": 3.6046511627906984e-07, |
| "loss": 1.0167625, |
| "loss_accumulated": 16.2682, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.957886044591247, |
| "grad_norm": 14.494958877563477, |
| "learning_rate": 2.4418604651162793e-07, |
| "loss": 1.06646875, |
| "loss_accumulated": 17.0635, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.979906413432425, |
| "grad_norm": 12.328352928161621, |
| "learning_rate": 1.2790697674418605e-07, |
| "loss": 1.05874375, |
| "loss_accumulated": 16.9399, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.979906413432425, |
| "eval_loss": 1.051900029182434, |
| "eval_runtime": 105.0206, |
| "eval_samples_per_second": 7.694, |
| "eval_steps_per_second": 7.694, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.32742166519165, |
| "learning_rate": 1.1627906976744186e-08, |
| "loss": 0.95435625, |
| "loss_accumulated": 15.2697, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 910, |
| "total_flos": 4.037627772142704e+17, |
| "train_loss": 17.02884989308787, |
| "train_runtime": 7294.6769, |
| "train_samples_per_second": 1.992, |
| "train_steps_per_second": 0.125 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 910, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.037627772142704e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|