{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 1329, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007524454477050414, "grad_norm": 270.3504638671875, "learning_rate": 0.0, "loss": 7.3906, "step": 1 }, { "epoch": 0.0015048908954100827, "grad_norm": 218.03387451171875, "learning_rate": 1.4925373134328358e-07, "loss": 6.1914, "step": 2 }, { "epoch": 0.002257336343115124, "grad_norm": 234.69158935546875, "learning_rate": 2.9850746268656716e-07, "loss": 6.3906, "step": 3 }, { "epoch": 0.0030097817908201654, "grad_norm": 184.603759765625, "learning_rate": 4.4776119402985074e-07, "loss": 5.8496, "step": 4 }, { "epoch": 0.003762227238525207, "grad_norm": 217.74620056152344, "learning_rate": 5.970149253731343e-07, "loss": 6.4297, "step": 5 }, { "epoch": 0.004514672686230248, "grad_norm": 234.3373260498047, "learning_rate": 7.462686567164179e-07, "loss": 6.1973, "step": 6 }, { "epoch": 0.005267118133935289, "grad_norm": 225.37820434570312, "learning_rate": 8.955223880597015e-07, "loss": 6.8613, "step": 7 }, { "epoch": 0.006019563581640331, "grad_norm": 236.18878173828125, "learning_rate": 1.044776119402985e-06, "loss": 6.3555, "step": 8 }, { "epoch": 0.006772009029345372, "grad_norm": 189.50917053222656, "learning_rate": 1.1940298507462686e-06, "loss": 5.1914, "step": 9 }, { "epoch": 0.007524454477050414, "grad_norm": 183.52633666992188, "learning_rate": 1.3432835820895524e-06, "loss": 4.8828, "step": 10 }, { "epoch": 0.008276899924755455, "grad_norm": 205.61083984375, "learning_rate": 1.4925373134328358e-06, "loss": 6.0234, "step": 11 }, { "epoch": 0.009029345372460496, "grad_norm": 153.64642333984375, "learning_rate": 1.6417910447761196e-06, "loss": 5.627, "step": 12 }, { "epoch": 0.009781790820165538, "grad_norm": 153.3954315185547, "learning_rate": 1.791044776119403e-06, "loss": 4.7754, "step": 13 }, { "epoch": 0.010534236267870579, "grad_norm": 173.182373046875, "learning_rate": 1.9402985074626867e-06, "loss": 5.5039, "step": 14 }, { "epoch": 0.011286681715575621, "grad_norm": 159.0615692138672, "learning_rate": 2.08955223880597e-06, "loss": 4.877, "step": 15 }, { "epoch": 0.012039127163280662, "grad_norm": 131.60826110839844, "learning_rate": 2.238805970149254e-06, "loss": 4.5352, "step": 16 }, { "epoch": 0.012791572610985704, "grad_norm": 75.3213119506836, "learning_rate": 2.3880597014925373e-06, "loss": 4.6309, "step": 17 }, { "epoch": 0.013544018058690745, "grad_norm": 55.645530700683594, "learning_rate": 2.537313432835821e-06, "loss": 4.0176, "step": 18 }, { "epoch": 0.014296463506395787, "grad_norm": 33.673458099365234, "learning_rate": 2.686567164179105e-06, "loss": 3.9746, "step": 19 }, { "epoch": 0.015048908954100828, "grad_norm": 27.180540084838867, "learning_rate": 2.835820895522388e-06, "loss": 4.4297, "step": 20 }, { "epoch": 0.01580135440180587, "grad_norm": 21.726226806640625, "learning_rate": 2.9850746268656716e-06, "loss": 3.875, "step": 21 }, { "epoch": 0.01655379984951091, "grad_norm": 110.95691680908203, "learning_rate": 3.1343283582089558e-06, "loss": 4.4336, "step": 22 }, { "epoch": 0.01730624529721595, "grad_norm": 231.21478271484375, "learning_rate": 3.283582089552239e-06, "loss": 5.5293, "step": 23 }, { "epoch": 0.01805869074492099, "grad_norm": 290.27117919921875, "learning_rate": 3.4328358208955225e-06, "loss": 6.1211, "step": 24 }, { "epoch": 0.018811136192626036, "grad_norm": 259.8589172363281, "learning_rate": 3.582089552238806e-06, "loss": 6.1562, "step": 25 }, { "epoch": 0.019563581640331076, "grad_norm": 284.6676940917969, "learning_rate": 3.73134328358209e-06, "loss": 6.0078, "step": 26 }, { "epoch": 0.020316027088036117, "grad_norm": 307.04949951171875, "learning_rate": 3.8805970149253735e-06, "loss": 6.2539, "step": 27 }, { "epoch": 0.021068472535741158, "grad_norm": 238.06138610839844, "learning_rate": 4.029850746268657e-06, "loss": 5.5859, "step": 28 }, { "epoch": 0.0218209179834462, "grad_norm": 218.7796630859375, "learning_rate": 4.17910447761194e-06, "loss": 5.3477, "step": 29 }, { "epoch": 0.022573363431151242, "grad_norm": 225.3642578125, "learning_rate": 4.3283582089552236e-06, "loss": 5.0352, "step": 30 }, { "epoch": 0.023325808878856283, "grad_norm": 187.6018524169922, "learning_rate": 4.477611940298508e-06, "loss": 4.7285, "step": 31 }, { "epoch": 0.024078254326561323, "grad_norm": 113.01893615722656, "learning_rate": 4.626865671641791e-06, "loss": 4.0684, "step": 32 }, { "epoch": 0.024830699774266364, "grad_norm": 69.8572769165039, "learning_rate": 4.7761194029850745e-06, "loss": 4.2344, "step": 33 }, { "epoch": 0.025583145221971408, "grad_norm": 33.99457931518555, "learning_rate": 4.925373134328359e-06, "loss": 3.9727, "step": 34 }, { "epoch": 0.02633559066967645, "grad_norm": 18.248146057128906, "learning_rate": 5.074626865671642e-06, "loss": 3.8906, "step": 35 }, { "epoch": 0.02708803611738149, "grad_norm": 83.59455108642578, "learning_rate": 5.2238805970149255e-06, "loss": 4.1074, "step": 36 }, { "epoch": 0.02784048156508653, "grad_norm": 45.39522171020508, "learning_rate": 5.37313432835821e-06, "loss": 3.959, "step": 37 }, { "epoch": 0.028592927012791574, "grad_norm": 75.98773956298828, "learning_rate": 5.522388059701493e-06, "loss": 4.2012, "step": 38 }, { "epoch": 0.029345372460496615, "grad_norm": 78.67504119873047, "learning_rate": 5.671641791044776e-06, "loss": 3.8828, "step": 39 }, { "epoch": 0.030097817908201655, "grad_norm": 73.8619613647461, "learning_rate": 5.820895522388061e-06, "loss": 3.8926, "step": 40 }, { "epoch": 0.030850263355906696, "grad_norm": 58.589107513427734, "learning_rate": 5.970149253731343e-06, "loss": 3.7617, "step": 41 }, { "epoch": 0.03160270880361174, "grad_norm": 49.83146286010742, "learning_rate": 6.119402985074627e-06, "loss": 3.8926, "step": 42 }, { "epoch": 0.03235515425131678, "grad_norm": 26.51479721069336, "learning_rate": 6.2686567164179116e-06, "loss": 3.3848, "step": 43 }, { "epoch": 0.03310759969902182, "grad_norm": 17.305334091186523, "learning_rate": 6.417910447761194e-06, "loss": 3.291, "step": 44 }, { "epoch": 0.033860045146726865, "grad_norm": 22.08051872253418, "learning_rate": 6.567164179104478e-06, "loss": 3.4844, "step": 45 }, { "epoch": 0.0346124905944319, "grad_norm": 43.44215393066406, "learning_rate": 6.7164179104477625e-06, "loss": 3.3047, "step": 46 }, { "epoch": 0.035364936042136946, "grad_norm": 39.65483474731445, "learning_rate": 6.865671641791045e-06, "loss": 3.8867, "step": 47 }, { "epoch": 0.03611738148984198, "grad_norm": 37.06460952758789, "learning_rate": 7.014925373134329e-06, "loss": 3.6523, "step": 48 }, { "epoch": 0.03686982693754703, "grad_norm": 29.9962158203125, "learning_rate": 7.164179104477612e-06, "loss": 3.2207, "step": 49 }, { "epoch": 0.03762227238525207, "grad_norm": 32.21905517578125, "learning_rate": 7.313432835820896e-06, "loss": 3.791, "step": 50 }, { "epoch": 0.03837471783295711, "grad_norm": 26.621665954589844, "learning_rate": 7.46268656716418e-06, "loss": 3.2559, "step": 51 }, { "epoch": 0.03912716328066215, "grad_norm": 26.82464599609375, "learning_rate": 7.611940298507463e-06, "loss": 3.832, "step": 52 }, { "epoch": 0.0398796087283672, "grad_norm": 27.126306533813477, "learning_rate": 7.761194029850747e-06, "loss": 3.541, "step": 53 }, { "epoch": 0.040632054176072234, "grad_norm": 29.40413475036621, "learning_rate": 7.91044776119403e-06, "loss": 3.666, "step": 54 }, { "epoch": 0.04138449962377728, "grad_norm": 24.618732452392578, "learning_rate": 8.059701492537314e-06, "loss": 3.1562, "step": 55 }, { "epoch": 0.042136945071482315, "grad_norm": 25.24435806274414, "learning_rate": 8.208955223880599e-06, "loss": 2.9307, "step": 56 }, { "epoch": 0.04288939051918736, "grad_norm": 21.84393310546875, "learning_rate": 8.35820895522388e-06, "loss": 4.084, "step": 57 }, { "epoch": 0.0436418359668924, "grad_norm": 20.979150772094727, "learning_rate": 8.507462686567165e-06, "loss": 3.1816, "step": 58 }, { "epoch": 0.04439428141459744, "grad_norm": 23.046876907348633, "learning_rate": 8.656716417910447e-06, "loss": 3.8184, "step": 59 }, { "epoch": 0.045146726862302484, "grad_norm": 27.49558448791504, "learning_rate": 8.805970149253732e-06, "loss": 3.4219, "step": 60 }, { "epoch": 0.04589917231000752, "grad_norm": 21.256746292114258, "learning_rate": 8.955223880597016e-06, "loss": 3.0449, "step": 61 }, { "epoch": 0.046651617757712566, "grad_norm": 25.865859985351562, "learning_rate": 9.104477611940299e-06, "loss": 3.2988, "step": 62 }, { "epoch": 0.04740406320541761, "grad_norm": 45.46930694580078, "learning_rate": 9.253731343283582e-06, "loss": 2.583, "step": 63 }, { "epoch": 0.04815650865312265, "grad_norm": 41.94594955444336, "learning_rate": 9.402985074626867e-06, "loss": 3.5938, "step": 64 }, { "epoch": 0.04890895410082769, "grad_norm": 32.41459274291992, "learning_rate": 9.552238805970149e-06, "loss": 3.373, "step": 65 }, { "epoch": 0.04966139954853273, "grad_norm": 31.194244384765625, "learning_rate": 9.701492537313434e-06, "loss": 3.377, "step": 66 }, { "epoch": 0.05041384499623777, "grad_norm": 24.94992446899414, "learning_rate": 9.850746268656717e-06, "loss": 3.3174, "step": 67 }, { "epoch": 0.051166290443942816, "grad_norm": 31.37045669555664, "learning_rate": 1e-05, "loss": 3.0703, "step": 68 }, { "epoch": 0.05191873589164785, "grad_norm": 38.56293869018555, "learning_rate": 9.992076069730588e-06, "loss": 3.4922, "step": 69 }, { "epoch": 0.0526711813393529, "grad_norm": 21.66668701171875, "learning_rate": 9.984152139461173e-06, "loss": 2.7578, "step": 70 }, { "epoch": 0.05342362678705794, "grad_norm": 26.6785831451416, "learning_rate": 9.97622820919176e-06, "loss": 3.0439, "step": 71 }, { "epoch": 0.05417607223476298, "grad_norm": 66.90357208251953, "learning_rate": 9.968304278922346e-06, "loss": 3.4922, "step": 72 }, { "epoch": 0.05492851768246802, "grad_norm": 17.229740142822266, "learning_rate": 9.960380348652933e-06, "loss": 3.1445, "step": 73 }, { "epoch": 0.05568096313017306, "grad_norm": 34.178218841552734, "learning_rate": 9.95245641838352e-06, "loss": 2.6406, "step": 74 }, { "epoch": 0.056433408577878104, "grad_norm": 21.619211196899414, "learning_rate": 9.944532488114107e-06, "loss": 3.0439, "step": 75 }, { "epoch": 0.05718585402558315, "grad_norm": 35.69949722290039, "learning_rate": 9.936608557844692e-06, "loss": 3.2402, "step": 76 }, { "epoch": 0.057938299473288185, "grad_norm": 68.4375, "learning_rate": 9.928684627575277e-06, "loss": 3.5332, "step": 77 }, { "epoch": 0.05869074492099323, "grad_norm": 22.187849044799805, "learning_rate": 9.920760697305864e-06, "loss": 2.9551, "step": 78 }, { "epoch": 0.059443190368698266, "grad_norm": 22.072538375854492, "learning_rate": 9.912836767036451e-06, "loss": 2.7285, "step": 79 }, { "epoch": 0.06019563581640331, "grad_norm": 32.30579376220703, "learning_rate": 9.904912836767039e-06, "loss": 2.8945, "step": 80 }, { "epoch": 0.060948081264108354, "grad_norm": 24.991010665893555, "learning_rate": 9.896988906497624e-06, "loss": 3.1191, "step": 81 }, { "epoch": 0.06170052671181339, "grad_norm": 52.45732498168945, "learning_rate": 9.88906497622821e-06, "loss": 3.5137, "step": 82 }, { "epoch": 0.062452972159518436, "grad_norm": 26.008697509765625, "learning_rate": 9.881141045958796e-06, "loss": 3.2324, "step": 83 }, { "epoch": 0.06320541760722348, "grad_norm": 38.75912094116211, "learning_rate": 9.873217115689383e-06, "loss": 3.2129, "step": 84 }, { "epoch": 0.06395786305492852, "grad_norm": 31.209091186523438, "learning_rate": 9.86529318541997e-06, "loss": 3.1328, "step": 85 }, { "epoch": 0.06471030850263355, "grad_norm": 34.91722106933594, "learning_rate": 9.857369255150556e-06, "loss": 3.0449, "step": 86 }, { "epoch": 0.0654627539503386, "grad_norm": 26.631141662597656, "learning_rate": 9.849445324881141e-06, "loss": 3.332, "step": 87 }, { "epoch": 0.06621519939804364, "grad_norm": 18.19337272644043, "learning_rate": 9.841521394611728e-06, "loss": 3.0322, "step": 88 }, { "epoch": 0.06696764484574869, "grad_norm": 55.028099060058594, "learning_rate": 9.833597464342315e-06, "loss": 3.4453, "step": 89 }, { "epoch": 0.06772009029345373, "grad_norm": 35.84956741333008, "learning_rate": 9.825673534072902e-06, "loss": 2.8672, "step": 90 }, { "epoch": 0.06847253574115876, "grad_norm": 29.46920394897461, "learning_rate": 9.817749603803487e-06, "loss": 2.6416, "step": 91 }, { "epoch": 0.0692249811888638, "grad_norm": 42.81882095336914, "learning_rate": 9.809825673534073e-06, "loss": 2.8066, "step": 92 }, { "epoch": 0.06997742663656885, "grad_norm": 20.653282165527344, "learning_rate": 9.80190174326466e-06, "loss": 3.0195, "step": 93 }, { "epoch": 0.07072987208427389, "grad_norm": 37.75685501098633, "learning_rate": 9.793977812995247e-06, "loss": 2.8027, "step": 94 }, { "epoch": 0.07148231753197894, "grad_norm": 30.57415199279785, "learning_rate": 9.786053882725834e-06, "loss": 3.6602, "step": 95 }, { "epoch": 0.07223476297968397, "grad_norm": 31.511478424072266, "learning_rate": 9.77812995245642e-06, "loss": 3.248, "step": 96 }, { "epoch": 0.07298720842738901, "grad_norm": 53.98818588256836, "learning_rate": 9.770206022187005e-06, "loss": 3.4531, "step": 97 }, { "epoch": 0.07373965387509406, "grad_norm": 22.481534957885742, "learning_rate": 9.762282091917592e-06, "loss": 2.6758, "step": 98 }, { "epoch": 0.0744920993227991, "grad_norm": 22.52367401123047, "learning_rate": 9.754358161648179e-06, "loss": 3.2891, "step": 99 }, { "epoch": 0.07524454477050414, "grad_norm": 24.657718658447266, "learning_rate": 9.746434231378766e-06, "loss": 2.7432, "step": 100 }, { "epoch": 0.07599699021820917, "grad_norm": 27.113811492919922, "learning_rate": 9.738510301109351e-06, "loss": 3.5898, "step": 101 }, { "epoch": 0.07674943566591422, "grad_norm": 31.955333709716797, "learning_rate": 9.730586370839936e-06, "loss": 2.7695, "step": 102 }, { "epoch": 0.07750188111361926, "grad_norm": 32.23259735107422, "learning_rate": 9.722662440570524e-06, "loss": 3.043, "step": 103 }, { "epoch": 0.0782543265613243, "grad_norm": 31.208330154418945, "learning_rate": 9.71473851030111e-06, "loss": 2.8613, "step": 104 }, { "epoch": 0.07900677200902935, "grad_norm": 18.11272621154785, "learning_rate": 9.706814580031696e-06, "loss": 2.8672, "step": 105 }, { "epoch": 0.0797592174567344, "grad_norm": 32.13460159301758, "learning_rate": 9.698890649762283e-06, "loss": 3.2012, "step": 106 }, { "epoch": 0.08051166290443942, "grad_norm": 26.03815269470215, "learning_rate": 9.69096671949287e-06, "loss": 2.8633, "step": 107 }, { "epoch": 0.08126410835214447, "grad_norm": 30.48563575744629, "learning_rate": 9.683042789223455e-06, "loss": 3.1289, "step": 108 }, { "epoch": 0.08201655379984951, "grad_norm": 33.55179977416992, "learning_rate": 9.675118858954042e-06, "loss": 3.002, "step": 109 }, { "epoch": 0.08276899924755456, "grad_norm": 37.811912536621094, "learning_rate": 9.667194928684628e-06, "loss": 2.6934, "step": 110 }, { "epoch": 0.0835214446952596, "grad_norm": 24.619897842407227, "learning_rate": 9.659270998415215e-06, "loss": 3.1836, "step": 111 }, { "epoch": 0.08427389014296463, "grad_norm": 54.72816848754883, "learning_rate": 9.651347068145802e-06, "loss": 3.5566, "step": 112 }, { "epoch": 0.08502633559066967, "grad_norm": 36.67848205566406, "learning_rate": 9.643423137876387e-06, "loss": 2.627, "step": 113 }, { "epoch": 0.08577878103837472, "grad_norm": 22.27309799194336, "learning_rate": 9.635499207606974e-06, "loss": 3.2832, "step": 114 }, { "epoch": 0.08653122648607976, "grad_norm": 30.337501525878906, "learning_rate": 9.62757527733756e-06, "loss": 3.2109, "step": 115 }, { "epoch": 0.0872836719337848, "grad_norm": 34.67364501953125, "learning_rate": 9.619651347068147e-06, "loss": 2.9326, "step": 116 }, { "epoch": 0.08803611738148984, "grad_norm": 19.17691421508789, "learning_rate": 9.611727416798734e-06, "loss": 3.1348, "step": 117 }, { "epoch": 0.08878856282919488, "grad_norm": 29.645797729492188, "learning_rate": 9.603803486529319e-06, "loss": 3.3652, "step": 118 }, { "epoch": 0.08954100827689992, "grad_norm": 26.387907028198242, "learning_rate": 9.595879556259906e-06, "loss": 3.2578, "step": 119 }, { "epoch": 0.09029345372460497, "grad_norm": 20.296672821044922, "learning_rate": 9.587955625990491e-06, "loss": 3.0078, "step": 120 }, { "epoch": 0.09104589917231001, "grad_norm": 18.01280975341797, "learning_rate": 9.580031695721078e-06, "loss": 3.5566, "step": 121 }, { "epoch": 0.09179834462001504, "grad_norm": 39.96554183959961, "learning_rate": 9.572107765451665e-06, "loss": 3.1553, "step": 122 }, { "epoch": 0.09255079006772009, "grad_norm": 24.43425941467285, "learning_rate": 9.56418383518225e-06, "loss": 3.459, "step": 123 }, { "epoch": 0.09330323551542513, "grad_norm": 47.12384033203125, "learning_rate": 9.556259904912838e-06, "loss": 3.0674, "step": 124 }, { "epoch": 0.09405568096313018, "grad_norm": 34.72853469848633, "learning_rate": 9.548335974643423e-06, "loss": 3.0586, "step": 125 }, { "epoch": 0.09480812641083522, "grad_norm": 23.229869842529297, "learning_rate": 9.54041204437401e-06, "loss": 2.8223, "step": 126 }, { "epoch": 0.09556057185854025, "grad_norm": 20.17858123779297, "learning_rate": 9.532488114104597e-06, "loss": 2.8359, "step": 127 }, { "epoch": 0.0963130173062453, "grad_norm": 33.083961486816406, "learning_rate": 9.524564183835183e-06, "loss": 3.0605, "step": 128 }, { "epoch": 0.09706546275395034, "grad_norm": 50.06746292114258, "learning_rate": 9.51664025356577e-06, "loss": 2.5381, "step": 129 }, { "epoch": 0.09781790820165538, "grad_norm": 22.124317169189453, "learning_rate": 9.508716323296355e-06, "loss": 2.9277, "step": 130 }, { "epoch": 0.09857035364936043, "grad_norm": 62.62016677856445, "learning_rate": 9.500792393026942e-06, "loss": 2.6914, "step": 131 }, { "epoch": 0.09932279909706546, "grad_norm": 49.8092155456543, "learning_rate": 9.492868462757529e-06, "loss": 3.1816, "step": 132 }, { "epoch": 0.1000752445447705, "grad_norm": 26.982786178588867, "learning_rate": 9.484944532488114e-06, "loss": 2.7666, "step": 133 }, { "epoch": 0.10082768999247554, "grad_norm": 26.919538497924805, "learning_rate": 9.477020602218701e-06, "loss": 2.6143, "step": 134 }, { "epoch": 0.10158013544018059, "grad_norm": 32.252845764160156, "learning_rate": 9.469096671949287e-06, "loss": 2.8164, "step": 135 }, { "epoch": 0.10233258088788563, "grad_norm": 48.72047424316406, "learning_rate": 9.461172741679874e-06, "loss": 3.2793, "step": 136 }, { "epoch": 0.10308502633559068, "grad_norm": 70.15787506103516, "learning_rate": 9.45324881141046e-06, "loss": 2.9873, "step": 137 }, { "epoch": 0.1038374717832957, "grad_norm": 88.84703826904297, "learning_rate": 9.445324881141046e-06, "loss": 3.2109, "step": 138 }, { "epoch": 0.10458991723100075, "grad_norm": 74.57728576660156, "learning_rate": 9.437400950871633e-06, "loss": 3.5938, "step": 139 }, { "epoch": 0.1053423626787058, "grad_norm": 31.566608428955078, "learning_rate": 9.429477020602219e-06, "loss": 3.2354, "step": 140 }, { "epoch": 0.10609480812641084, "grad_norm": 21.33376121520996, "learning_rate": 9.421553090332806e-06, "loss": 2.5771, "step": 141 }, { "epoch": 0.10684725357411588, "grad_norm": 35.084815979003906, "learning_rate": 9.413629160063393e-06, "loss": 2.8975, "step": 142 }, { "epoch": 0.10759969902182091, "grad_norm": 55.037899017333984, "learning_rate": 9.405705229793978e-06, "loss": 3.3027, "step": 143 }, { "epoch": 0.10835214446952596, "grad_norm": 89.90687561035156, "learning_rate": 9.397781299524565e-06, "loss": 3.0615, "step": 144 }, { "epoch": 0.109104589917231, "grad_norm": 45.729373931884766, "learning_rate": 9.38985736925515e-06, "loss": 3.0762, "step": 145 }, { "epoch": 0.10985703536493605, "grad_norm": 29.83327865600586, "learning_rate": 9.381933438985737e-06, "loss": 2.8154, "step": 146 }, { "epoch": 0.11060948081264109, "grad_norm": 26.802101135253906, "learning_rate": 9.374009508716324e-06, "loss": 3.0469, "step": 147 }, { "epoch": 0.11136192626034612, "grad_norm": 49.10624313354492, "learning_rate": 9.366085578446912e-06, "loss": 3.0547, "step": 148 }, { "epoch": 0.11211437170805116, "grad_norm": 73.40638732910156, "learning_rate": 9.358161648177497e-06, "loss": 3.8574, "step": 149 }, { "epoch": 0.11286681715575621, "grad_norm": 18.41849136352539, "learning_rate": 9.350237717908082e-06, "loss": 2.5781, "step": 150 }, { "epoch": 0.11361926260346125, "grad_norm": 36.31776428222656, "learning_rate": 9.34231378763867e-06, "loss": 2.9971, "step": 151 }, { "epoch": 0.1143717080511663, "grad_norm": 25.525556564331055, "learning_rate": 9.334389857369256e-06, "loss": 3.2266, "step": 152 }, { "epoch": 0.11512415349887133, "grad_norm": 29.12396812438965, "learning_rate": 9.326465927099843e-06, "loss": 3.3887, "step": 153 }, { "epoch": 0.11587659894657637, "grad_norm": 21.789138793945312, "learning_rate": 9.318541996830429e-06, "loss": 2.8047, "step": 154 }, { "epoch": 0.11662904439428141, "grad_norm": 26.248098373413086, "learning_rate": 9.310618066561014e-06, "loss": 3.2852, "step": 155 }, { "epoch": 0.11738148984198646, "grad_norm": 18.92224884033203, "learning_rate": 9.302694136291601e-06, "loss": 2.667, "step": 156 }, { "epoch": 0.1181339352896915, "grad_norm": 32.231597900390625, "learning_rate": 9.294770206022188e-06, "loss": 2.9531, "step": 157 }, { "epoch": 0.11888638073739653, "grad_norm": 33.85251235961914, "learning_rate": 9.286846275752775e-06, "loss": 3.0371, "step": 158 }, { "epoch": 0.11963882618510158, "grad_norm": 21.563522338867188, "learning_rate": 9.27892234548336e-06, "loss": 2.8262, "step": 159 }, { "epoch": 0.12039127163280662, "grad_norm": 18.7381649017334, "learning_rate": 9.270998415213946e-06, "loss": 2.9717, "step": 160 }, { "epoch": 0.12114371708051166, "grad_norm": 22.355424880981445, "learning_rate": 9.263074484944533e-06, "loss": 2.8105, "step": 161 }, { "epoch": 0.12189616252821671, "grad_norm": 41.8394660949707, "learning_rate": 9.25515055467512e-06, "loss": 2.9395, "step": 162 }, { "epoch": 0.12264860797592174, "grad_norm": 31.401140213012695, "learning_rate": 9.247226624405707e-06, "loss": 3.1904, "step": 163 }, { "epoch": 0.12340105342362678, "grad_norm": 29.943819046020508, "learning_rate": 9.239302694136292e-06, "loss": 2.7578, "step": 164 }, { "epoch": 0.12415349887133183, "grad_norm": 26.2047119140625, "learning_rate": 9.231378763866878e-06, "loss": 3.0879, "step": 165 }, { "epoch": 0.12490594431903687, "grad_norm": 24.09654998779297, "learning_rate": 9.223454833597465e-06, "loss": 3.0625, "step": 166 }, { "epoch": 0.1256583897667419, "grad_norm": 20.292509078979492, "learning_rate": 9.215530903328052e-06, "loss": 2.6992, "step": 167 }, { "epoch": 0.12641083521444696, "grad_norm": 24.645313262939453, "learning_rate": 9.207606973058639e-06, "loss": 2.8486, "step": 168 }, { "epoch": 0.127163280662152, "grad_norm": 47.42299270629883, "learning_rate": 9.199683042789224e-06, "loss": 3.0645, "step": 169 }, { "epoch": 0.12791572610985705, "grad_norm": 31.85733413696289, "learning_rate": 9.19175911251981e-06, "loss": 3.2109, "step": 170 }, { "epoch": 0.12866817155756208, "grad_norm": 24.819351196289062, "learning_rate": 9.183835182250396e-06, "loss": 2.9268, "step": 171 }, { "epoch": 0.1294206170052671, "grad_norm": 32.172393798828125, "learning_rate": 9.175911251980984e-06, "loss": 2.7207, "step": 172 }, { "epoch": 0.13017306245297217, "grad_norm": 28.043930053710938, "learning_rate": 9.16798732171157e-06, "loss": 2.9805, "step": 173 }, { "epoch": 0.1309255079006772, "grad_norm": 85.42208862304688, "learning_rate": 9.160063391442156e-06, "loss": 3.0645, "step": 174 }, { "epoch": 0.13167795334838225, "grad_norm": 45.047210693359375, "learning_rate": 9.152139461172741e-06, "loss": 2.8105, "step": 175 }, { "epoch": 0.13243039879608728, "grad_norm": 24.070430755615234, "learning_rate": 9.144215530903328e-06, "loss": 3.1055, "step": 176 }, { "epoch": 0.13318284424379231, "grad_norm": 21.807907104492188, "learning_rate": 9.136291600633915e-06, "loss": 3.1523, "step": 177 }, { "epoch": 0.13393528969149737, "grad_norm": 28.564992904663086, "learning_rate": 9.128367670364502e-06, "loss": 2.9219, "step": 178 }, { "epoch": 0.1346877351392024, "grad_norm": 21.0492000579834, "learning_rate": 9.120443740095088e-06, "loss": 3.4619, "step": 179 }, { "epoch": 0.13544018058690746, "grad_norm": 60.33738708496094, "learning_rate": 9.112519809825675e-06, "loss": 2.9395, "step": 180 }, { "epoch": 0.1361926260346125, "grad_norm": 17.952964782714844, "learning_rate": 9.10459587955626e-06, "loss": 2.4043, "step": 181 }, { "epoch": 0.13694507148231752, "grad_norm": 35.40900802612305, "learning_rate": 9.096671949286847e-06, "loss": 2.709, "step": 182 }, { "epoch": 0.13769751693002258, "grad_norm": 18.852981567382812, "learning_rate": 9.088748019017434e-06, "loss": 2.8086, "step": 183 }, { "epoch": 0.1384499623777276, "grad_norm": 29.301170349121094, "learning_rate": 9.08082408874802e-06, "loss": 3.6641, "step": 184 }, { "epoch": 0.13920240782543267, "grad_norm": 46.67580032348633, "learning_rate": 9.072900158478607e-06, "loss": 2.8477, "step": 185 }, { "epoch": 0.1399548532731377, "grad_norm": 28.98253631591797, "learning_rate": 9.064976228209192e-06, "loss": 3.0342, "step": 186 }, { "epoch": 0.14070729872084273, "grad_norm": 25.457124710083008, "learning_rate": 9.057052297939779e-06, "loss": 2.6748, "step": 187 }, { "epoch": 0.14145974416854779, "grad_norm": 26.476932525634766, "learning_rate": 9.049128367670366e-06, "loss": 3.0654, "step": 188 }, { "epoch": 0.14221218961625282, "grad_norm": 22.27900505065918, "learning_rate": 9.041204437400951e-06, "loss": 3.1953, "step": 189 }, { "epoch": 0.14296463506395787, "grad_norm": 38.6421012878418, "learning_rate": 9.033280507131538e-06, "loss": 3.3906, "step": 190 }, { "epoch": 0.1437170805116629, "grad_norm": 46.56447982788086, "learning_rate": 9.025356576862124e-06, "loss": 2.8262, "step": 191 }, { "epoch": 0.14446952595936793, "grad_norm": 27.99505043029785, "learning_rate": 9.01743264659271e-06, "loss": 2.7803, "step": 192 }, { "epoch": 0.145221971407073, "grad_norm": 20.122390747070312, "learning_rate": 9.009508716323298e-06, "loss": 2.1504, "step": 193 }, { "epoch": 0.14597441685477802, "grad_norm": 34.87712478637695, "learning_rate": 9.001584786053883e-06, "loss": 2.6416, "step": 194 }, { "epoch": 0.14672686230248308, "grad_norm": 31.978273391723633, "learning_rate": 8.99366085578447e-06, "loss": 3.1445, "step": 195 }, { "epoch": 0.1474793077501881, "grad_norm": 25.95554542541504, "learning_rate": 8.985736925515056e-06, "loss": 2.918, "step": 196 }, { "epoch": 0.14823175319789314, "grad_norm": 63.560035705566406, "learning_rate": 8.977812995245643e-06, "loss": 2.5674, "step": 197 }, { "epoch": 0.1489841986455982, "grad_norm": 50.40849304199219, "learning_rate": 8.96988906497623e-06, "loss": 3.4824, "step": 198 }, { "epoch": 0.14973664409330323, "grad_norm": 36.795013427734375, "learning_rate": 8.961965134706815e-06, "loss": 3.3955, "step": 199 }, { "epoch": 0.1504890895410083, "grad_norm": 23.839466094970703, "learning_rate": 8.954041204437402e-06, "loss": 2.9502, "step": 200 }, { "epoch": 0.15124153498871332, "grad_norm": 33.121131896972656, "learning_rate": 8.946117274167987e-06, "loss": 2.749, "step": 201 }, { "epoch": 0.15199398043641835, "grad_norm": 16.309022903442383, "learning_rate": 8.938193343898574e-06, "loss": 2.5889, "step": 202 }, { "epoch": 0.1527464258841234, "grad_norm": 23.139263153076172, "learning_rate": 8.930269413629161e-06, "loss": 2.9033, "step": 203 }, { "epoch": 0.15349887133182843, "grad_norm": 26.247356414794922, "learning_rate": 8.922345483359747e-06, "loss": 2.7109, "step": 204 }, { "epoch": 0.1542513167795335, "grad_norm": 55.275264739990234, "learning_rate": 8.914421553090334e-06, "loss": 3.2109, "step": 205 }, { "epoch": 0.15500376222723852, "grad_norm": 37.078006744384766, "learning_rate": 8.90649762282092e-06, "loss": 3.3486, "step": 206 }, { "epoch": 0.15575620767494355, "grad_norm": 40.6878662109375, "learning_rate": 8.898573692551506e-06, "loss": 3.3584, "step": 207 }, { "epoch": 0.1565086531226486, "grad_norm": 19.50431251525879, "learning_rate": 8.890649762282093e-06, "loss": 3.2998, "step": 208 }, { "epoch": 0.15726109857035364, "grad_norm": 21.160484313964844, "learning_rate": 8.882725832012679e-06, "loss": 2.8994, "step": 209 }, { "epoch": 0.1580135440180587, "grad_norm": 28.999177932739258, "learning_rate": 8.874801901743266e-06, "loss": 2.4697, "step": 210 }, { "epoch": 0.15876598946576373, "grad_norm": 61.649654388427734, "learning_rate": 8.866877971473851e-06, "loss": 3.0391, "step": 211 }, { "epoch": 0.1595184349134688, "grad_norm": 43.92689514160156, "learning_rate": 8.858954041204438e-06, "loss": 3.0234, "step": 212 }, { "epoch": 0.16027088036117382, "grad_norm": 49.55857467651367, "learning_rate": 8.851030110935025e-06, "loss": 2.8916, "step": 213 }, { "epoch": 0.16102332580887885, "grad_norm": 31.43752670288086, "learning_rate": 8.84310618066561e-06, "loss": 3.0225, "step": 214 }, { "epoch": 0.1617757712565839, "grad_norm": 39.49406051635742, "learning_rate": 8.835182250396197e-06, "loss": 2.6123, "step": 215 }, { "epoch": 0.16252821670428894, "grad_norm": 39.66098403930664, "learning_rate": 8.827258320126783e-06, "loss": 3.0879, "step": 216 }, { "epoch": 0.163280662151994, "grad_norm": 23.214488983154297, "learning_rate": 8.81933438985737e-06, "loss": 2.9238, "step": 217 }, { "epoch": 0.16403310759969902, "grad_norm": 23.282730102539062, "learning_rate": 8.811410459587957e-06, "loss": 3.0547, "step": 218 }, { "epoch": 0.16478555304740405, "grad_norm": 27.417238235473633, "learning_rate": 8.803486529318542e-06, "loss": 2.6113, "step": 219 }, { "epoch": 0.1655379984951091, "grad_norm": 19.980924606323242, "learning_rate": 8.79556259904913e-06, "loss": 2.3564, "step": 220 }, { "epoch": 0.16629044394281414, "grad_norm": 18.440303802490234, "learning_rate": 8.787638668779716e-06, "loss": 2.3672, "step": 221 }, { "epoch": 0.1670428893905192, "grad_norm": 27.1029052734375, "learning_rate": 8.779714738510302e-06, "loss": 2.6631, "step": 222 }, { "epoch": 0.16779533483822423, "grad_norm": 25.787336349487305, "learning_rate": 8.771790808240889e-06, "loss": 2.6289, "step": 223 }, { "epoch": 0.16854778028592926, "grad_norm": 31.063365936279297, "learning_rate": 8.763866877971474e-06, "loss": 3.0146, "step": 224 }, { "epoch": 0.16930022573363432, "grad_norm": 39.28546905517578, "learning_rate": 8.755942947702061e-06, "loss": 3.3584, "step": 225 }, { "epoch": 0.17005267118133935, "grad_norm": 41.93635940551758, "learning_rate": 8.748019017432648e-06, "loss": 2.5762, "step": 226 }, { "epoch": 0.1708051166290444, "grad_norm": 26.295791625976562, "learning_rate": 8.740095087163233e-06, "loss": 3.0781, "step": 227 }, { "epoch": 0.17155756207674944, "grad_norm": 32.962379455566406, "learning_rate": 8.73217115689382e-06, "loss": 2.5918, "step": 228 }, { "epoch": 0.17231000752445447, "grad_norm": 29.288211822509766, "learning_rate": 8.724247226624406e-06, "loss": 3.043, "step": 229 }, { "epoch": 0.17306245297215953, "grad_norm": 23.087108612060547, "learning_rate": 8.716323296354993e-06, "loss": 2.9277, "step": 230 }, { "epoch": 0.17381489841986456, "grad_norm": 57.002716064453125, "learning_rate": 8.70839936608558e-06, "loss": 2.9668, "step": 231 }, { "epoch": 0.1745673438675696, "grad_norm": 21.86600685119629, "learning_rate": 8.700475435816165e-06, "loss": 2.627, "step": 232 }, { "epoch": 0.17531978931527464, "grad_norm": 19.007726669311523, "learning_rate": 8.692551505546752e-06, "loss": 2.8184, "step": 233 }, { "epoch": 0.17607223476297967, "grad_norm": 22.28285026550293, "learning_rate": 8.684627575277338e-06, "loss": 2.8965, "step": 234 }, { "epoch": 0.17682468021068473, "grad_norm": 43.95874786376953, "learning_rate": 8.676703645007925e-06, "loss": 2.7998, "step": 235 }, { "epoch": 0.17757712565838976, "grad_norm": 21.108388900756836, "learning_rate": 8.668779714738512e-06, "loss": 2.7344, "step": 236 }, { "epoch": 0.17832957110609482, "grad_norm": 21.255399703979492, "learning_rate": 8.660855784469097e-06, "loss": 3.0166, "step": 237 }, { "epoch": 0.17908201655379985, "grad_norm": 38.1331672668457, "learning_rate": 8.652931854199684e-06, "loss": 2.5342, "step": 238 }, { "epoch": 0.17983446200150488, "grad_norm": 27.367652893066406, "learning_rate": 8.64500792393027e-06, "loss": 3.0059, "step": 239 }, { "epoch": 0.18058690744920994, "grad_norm": 21.36908531188965, "learning_rate": 8.637083993660857e-06, "loss": 2.5986, "step": 240 }, { "epoch": 0.18133935289691497, "grad_norm": 24.965232849121094, "learning_rate": 8.629160063391444e-06, "loss": 2.7422, "step": 241 }, { "epoch": 0.18209179834462003, "grad_norm": 26.240676879882812, "learning_rate": 8.621236133122029e-06, "loss": 2.7207, "step": 242 }, { "epoch": 0.18284424379232506, "grad_norm": 25.53062629699707, "learning_rate": 8.613312202852616e-06, "loss": 2.9189, "step": 243 }, { "epoch": 0.1835966892400301, "grad_norm": 30.517423629760742, "learning_rate": 8.605388272583201e-06, "loss": 3.0566, "step": 244 }, { "epoch": 0.18434913468773514, "grad_norm": 25.685810089111328, "learning_rate": 8.597464342313788e-06, "loss": 2.3613, "step": 245 }, { "epoch": 0.18510158013544017, "grad_norm": 25.89740562438965, "learning_rate": 8.589540412044375e-06, "loss": 3.126, "step": 246 }, { "epoch": 0.18585402558314523, "grad_norm": 18.76670265197754, "learning_rate": 8.58161648177496e-06, "loss": 2.4902, "step": 247 }, { "epoch": 0.18660647103085026, "grad_norm": 27.958297729492188, "learning_rate": 8.573692551505548e-06, "loss": 2.7461, "step": 248 }, { "epoch": 0.1873589164785553, "grad_norm": 21.461612701416016, "learning_rate": 8.565768621236133e-06, "loss": 2.9189, "step": 249 }, { "epoch": 0.18811136192626035, "grad_norm": 23.18748664855957, "learning_rate": 8.55784469096672e-06, "loss": 2.3467, "step": 250 }, { "epoch": 0.18886380737396538, "grad_norm": 50.05548858642578, "learning_rate": 8.549920760697307e-06, "loss": 2.7305, "step": 251 }, { "epoch": 0.18961625282167044, "grad_norm": 38.1525764465332, "learning_rate": 8.541996830427893e-06, "loss": 3.5957, "step": 252 }, { "epoch": 0.19036869826937547, "grad_norm": 41.369293212890625, "learning_rate": 8.53407290015848e-06, "loss": 2.4688, "step": 253 }, { "epoch": 0.1911211437170805, "grad_norm": 30.51654815673828, "learning_rate": 8.526148969889065e-06, "loss": 2.9824, "step": 254 }, { "epoch": 0.19187358916478556, "grad_norm": 50.003944396972656, "learning_rate": 8.518225039619652e-06, "loss": 2.8887, "step": 255 }, { "epoch": 0.1926260346124906, "grad_norm": 22.59164810180664, "learning_rate": 8.510301109350239e-06, "loss": 2.4131, "step": 256 }, { "epoch": 0.19337848006019565, "grad_norm": 21.20647621154785, "learning_rate": 8.502377179080824e-06, "loss": 2.5166, "step": 257 }, { "epoch": 0.19413092550790068, "grad_norm": 20.6810302734375, "learning_rate": 8.494453248811411e-06, "loss": 2.5273, "step": 258 }, { "epoch": 0.1948833709556057, "grad_norm": 27.598730087280273, "learning_rate": 8.486529318541997e-06, "loss": 2.6357, "step": 259 }, { "epoch": 0.19563581640331076, "grad_norm": 28.025638580322266, "learning_rate": 8.478605388272584e-06, "loss": 3.1553, "step": 260 }, { "epoch": 0.1963882618510158, "grad_norm": 25.879112243652344, "learning_rate": 8.47068145800317e-06, "loss": 3.0566, "step": 261 }, { "epoch": 0.19714070729872085, "grad_norm": 37.4360466003418, "learning_rate": 8.462757527733758e-06, "loss": 3.5303, "step": 262 }, { "epoch": 0.19789315274642588, "grad_norm": 27.776281356811523, "learning_rate": 8.454833597464343e-06, "loss": 2.7139, "step": 263 }, { "epoch": 0.1986455981941309, "grad_norm": 30.19846534729004, "learning_rate": 8.446909667194929e-06, "loss": 2.8203, "step": 264 }, { "epoch": 0.19939804364183597, "grad_norm": 35.52974319458008, "learning_rate": 8.438985736925516e-06, "loss": 3.0059, "step": 265 }, { "epoch": 0.200150489089541, "grad_norm": 18.416040420532227, "learning_rate": 8.431061806656103e-06, "loss": 2.8809, "step": 266 }, { "epoch": 0.20090293453724606, "grad_norm": 24.434906005859375, "learning_rate": 8.42313787638669e-06, "loss": 2.9688, "step": 267 }, { "epoch": 0.2016553799849511, "grad_norm": 17.731821060180664, "learning_rate": 8.415213946117275e-06, "loss": 2.7617, "step": 268 }, { "epoch": 0.20240782543265612, "grad_norm": 49.564125061035156, "learning_rate": 8.40729001584786e-06, "loss": 3.2773, "step": 269 }, { "epoch": 0.20316027088036118, "grad_norm": 23.3712215423584, "learning_rate": 8.399366085578447e-06, "loss": 3.2969, "step": 270 }, { "epoch": 0.2039127163280662, "grad_norm": 18.392166137695312, "learning_rate": 8.391442155309034e-06, "loss": 2.6846, "step": 271 }, { "epoch": 0.20466516177577126, "grad_norm": 17.793779373168945, "learning_rate": 8.383518225039621e-06, "loss": 2.6895, "step": 272 }, { "epoch": 0.2054176072234763, "grad_norm": 27.635812759399414, "learning_rate": 8.375594294770207e-06, "loss": 2.751, "step": 273 }, { "epoch": 0.20617005267118135, "grad_norm": 52.76430130004883, "learning_rate": 8.367670364500792e-06, "loss": 2.8457, "step": 274 }, { "epoch": 0.20692249811888638, "grad_norm": 23.622615814208984, "learning_rate": 8.35974643423138e-06, "loss": 2.8574, "step": 275 }, { "epoch": 0.2076749435665914, "grad_norm": 33.52585983276367, "learning_rate": 8.351822503961966e-06, "loss": 2.9033, "step": 276 }, { "epoch": 0.20842738901429647, "grad_norm": 50.93476867675781, "learning_rate": 8.343898573692553e-06, "loss": 2.75, "step": 277 }, { "epoch": 0.2091798344620015, "grad_norm": 20.75809097290039, "learning_rate": 8.335974643423139e-06, "loss": 2.3896, "step": 278 }, { "epoch": 0.20993227990970656, "grad_norm": 25.250638961791992, "learning_rate": 8.328050713153724e-06, "loss": 2.7451, "step": 279 }, { "epoch": 0.2106847253574116, "grad_norm": 26.620756149291992, "learning_rate": 8.320126782884311e-06, "loss": 2.7588, "step": 280 }, { "epoch": 0.21143717080511662, "grad_norm": 21.96516990661621, "learning_rate": 8.312202852614898e-06, "loss": 2.9248, "step": 281 }, { "epoch": 0.21218961625282168, "grad_norm": 26.421092987060547, "learning_rate": 8.304278922345485e-06, "loss": 2.8486, "step": 282 }, { "epoch": 0.2129420617005267, "grad_norm": 20.29400062561035, "learning_rate": 8.29635499207607e-06, "loss": 2.9727, "step": 283 }, { "epoch": 0.21369450714823177, "grad_norm": 29.198728561401367, "learning_rate": 8.288431061806656e-06, "loss": 2.4951, "step": 284 }, { "epoch": 0.2144469525959368, "grad_norm": 23.257631301879883, "learning_rate": 8.280507131537243e-06, "loss": 2.9082, "step": 285 }, { "epoch": 0.21519939804364183, "grad_norm": 51.7125358581543, "learning_rate": 8.27258320126783e-06, "loss": 2.5234, "step": 286 }, { "epoch": 0.21595184349134688, "grad_norm": 24.36754035949707, "learning_rate": 8.264659270998417e-06, "loss": 2.6406, "step": 287 }, { "epoch": 0.21670428893905191, "grad_norm": 30.53868865966797, "learning_rate": 8.256735340729002e-06, "loss": 2.7607, "step": 288 }, { "epoch": 0.21745673438675697, "grad_norm": 27.01729393005371, "learning_rate": 8.24881141045959e-06, "loss": 2.8223, "step": 289 }, { "epoch": 0.218209179834462, "grad_norm": 24.644309997558594, "learning_rate": 8.240887480190175e-06, "loss": 2.75, "step": 290 }, { "epoch": 0.21896162528216703, "grad_norm": 45.585166931152344, "learning_rate": 8.232963549920762e-06, "loss": 2.4844, "step": 291 }, { "epoch": 0.2197140707298721, "grad_norm": 24.803770065307617, "learning_rate": 8.225039619651349e-06, "loss": 2.8115, "step": 292 }, { "epoch": 0.22046651617757712, "grad_norm": 29.119102478027344, "learning_rate": 8.217115689381934e-06, "loss": 3.543, "step": 293 }, { "epoch": 0.22121896162528218, "grad_norm": 35.074031829833984, "learning_rate": 8.209191759112521e-06, "loss": 3.1289, "step": 294 }, { "epoch": 0.2219714070729872, "grad_norm": 34.88431167602539, "learning_rate": 8.201267828843106e-06, "loss": 2.9209, "step": 295 }, { "epoch": 0.22272385252069224, "grad_norm": 36.36684799194336, "learning_rate": 8.193343898573693e-06, "loss": 2.8428, "step": 296 }, { "epoch": 0.2234762979683973, "grad_norm": 40.56970977783203, "learning_rate": 8.18541996830428e-06, "loss": 2.5723, "step": 297 }, { "epoch": 0.22422874341610233, "grad_norm": 32.04544448852539, "learning_rate": 8.177496038034866e-06, "loss": 3.0215, "step": 298 }, { "epoch": 0.22498118886380739, "grad_norm": 21.52565574645996, "learning_rate": 8.169572107765453e-06, "loss": 3.4941, "step": 299 }, { "epoch": 0.22573363431151242, "grad_norm": 17.581174850463867, "learning_rate": 8.161648177496038e-06, "loss": 2.7656, "step": 300 }, { "epoch": 0.22648607975921745, "grad_norm": 17.084651947021484, "learning_rate": 8.153724247226625e-06, "loss": 2.3311, "step": 301 }, { "epoch": 0.2272385252069225, "grad_norm": 20.111968994140625, "learning_rate": 8.145800316957212e-06, "loss": 2.8135, "step": 302 }, { "epoch": 0.22799097065462753, "grad_norm": 38.68403625488281, "learning_rate": 8.137876386687798e-06, "loss": 2.9668, "step": 303 }, { "epoch": 0.2287434161023326, "grad_norm": 63.05813980102539, "learning_rate": 8.129952456418385e-06, "loss": 3.1807, "step": 304 }, { "epoch": 0.22949586155003762, "grad_norm": 26.78461265563965, "learning_rate": 8.12202852614897e-06, "loss": 3.1641, "step": 305 }, { "epoch": 0.23024830699774265, "grad_norm": 22.460067749023438, "learning_rate": 8.114104595879557e-06, "loss": 2.8037, "step": 306 }, { "epoch": 0.2310007524454477, "grad_norm": 33.41761779785156, "learning_rate": 8.106180665610144e-06, "loss": 3.3008, "step": 307 }, { "epoch": 0.23175319789315274, "grad_norm": 30.717609405517578, "learning_rate": 8.09825673534073e-06, "loss": 2.7041, "step": 308 }, { "epoch": 0.2325056433408578, "grad_norm": 64.30577850341797, "learning_rate": 8.090332805071317e-06, "loss": 3.1162, "step": 309 }, { "epoch": 0.23325808878856283, "grad_norm": 35.22840118408203, "learning_rate": 8.082408874801902e-06, "loss": 2.6377, "step": 310 }, { "epoch": 0.23401053423626786, "grad_norm": 20.862028121948242, "learning_rate": 8.074484944532489e-06, "loss": 2.7969, "step": 311 }, { "epoch": 0.23476297968397292, "grad_norm": 26.325332641601562, "learning_rate": 8.066561014263076e-06, "loss": 2.6836, "step": 312 }, { "epoch": 0.23551542513167795, "grad_norm": 50.82712173461914, "learning_rate": 8.058637083993661e-06, "loss": 3.0352, "step": 313 }, { "epoch": 0.236267870579383, "grad_norm": 32.116920471191406, "learning_rate": 8.050713153724248e-06, "loss": 3.0156, "step": 314 }, { "epoch": 0.23702031602708803, "grad_norm": 18.274662017822266, "learning_rate": 8.042789223454834e-06, "loss": 2.791, "step": 315 }, { "epoch": 0.23777276147479307, "grad_norm": 30.45490837097168, "learning_rate": 8.03486529318542e-06, "loss": 2.8223, "step": 316 }, { "epoch": 0.23852520692249812, "grad_norm": 34.29349899291992, "learning_rate": 8.026941362916006e-06, "loss": 2.8623, "step": 317 }, { "epoch": 0.23927765237020315, "grad_norm": 23.390230178833008, "learning_rate": 8.019017432646593e-06, "loss": 2.8398, "step": 318 }, { "epoch": 0.2400300978179082, "grad_norm": 39.82279968261719, "learning_rate": 8.01109350237718e-06, "loss": 3.3867, "step": 319 }, { "epoch": 0.24078254326561324, "grad_norm": 16.220998764038086, "learning_rate": 8.003169572107765e-06, "loss": 3.0234, "step": 320 }, { "epoch": 0.24153498871331827, "grad_norm": 19.393198013305664, "learning_rate": 7.995245641838353e-06, "loss": 3.1367, "step": 321 }, { "epoch": 0.24228743416102333, "grad_norm": 20.260129928588867, "learning_rate": 7.987321711568938e-06, "loss": 2.5, "step": 322 }, { "epoch": 0.24303987960872836, "grad_norm": 26.83426284790039, "learning_rate": 7.979397781299525e-06, "loss": 2.7949, "step": 323 }, { "epoch": 0.24379232505643342, "grad_norm": 16.979801177978516, "learning_rate": 7.971473851030112e-06, "loss": 2.748, "step": 324 }, { "epoch": 0.24454477050413845, "grad_norm": 32.53853988647461, "learning_rate": 7.963549920760697e-06, "loss": 2.6592, "step": 325 }, { "epoch": 0.24529721595184348, "grad_norm": 35.54403305053711, "learning_rate": 7.955625990491284e-06, "loss": 2.7207, "step": 326 }, { "epoch": 0.24604966139954854, "grad_norm": 26.685930252075195, "learning_rate": 7.94770206022187e-06, "loss": 2.7061, "step": 327 }, { "epoch": 0.24680210684725357, "grad_norm": 25.83123207092285, "learning_rate": 7.939778129952457e-06, "loss": 3.1514, "step": 328 }, { "epoch": 0.24755455229495862, "grad_norm": 20.83842658996582, "learning_rate": 7.931854199683044e-06, "loss": 2.4766, "step": 329 }, { "epoch": 0.24830699774266365, "grad_norm": 25.924402236938477, "learning_rate": 7.92393026941363e-06, "loss": 2.8398, "step": 330 }, { "epoch": 0.24905944319036868, "grad_norm": 31.948139190673828, "learning_rate": 7.916006339144216e-06, "loss": 3.0576, "step": 331 }, { "epoch": 0.24981188863807374, "grad_norm": 22.187664031982422, "learning_rate": 7.908082408874802e-06, "loss": 3.3047, "step": 332 }, { "epoch": 0.2505643340857788, "grad_norm": 34.52567672729492, "learning_rate": 7.900158478605389e-06, "loss": 3.168, "step": 333 }, { "epoch": 0.2513167795334838, "grad_norm": 21.3934326171875, "learning_rate": 7.892234548335976e-06, "loss": 3.041, "step": 334 }, { "epoch": 0.2520692249811889, "grad_norm": 42.445343017578125, "learning_rate": 7.884310618066563e-06, "loss": 2.6445, "step": 335 }, { "epoch": 0.2528216704288939, "grad_norm": 36.86784744262695, "learning_rate": 7.876386687797148e-06, "loss": 2.7383, "step": 336 }, { "epoch": 0.25357411587659895, "grad_norm": 56.01811981201172, "learning_rate": 7.868462757527733e-06, "loss": 2.6211, "step": 337 }, { "epoch": 0.254326561324304, "grad_norm": 23.526220321655273, "learning_rate": 7.86053882725832e-06, "loss": 2.6914, "step": 338 }, { "epoch": 0.255079006772009, "grad_norm": 23.751649856567383, "learning_rate": 7.852614896988907e-06, "loss": 2.9531, "step": 339 }, { "epoch": 0.2558314522197141, "grad_norm": 16.780838012695312, "learning_rate": 7.844690966719494e-06, "loss": 2.6992, "step": 340 }, { "epoch": 0.2565838976674191, "grad_norm": 31.69736671447754, "learning_rate": 7.83676703645008e-06, "loss": 3.0254, "step": 341 }, { "epoch": 0.25733634311512416, "grad_norm": 26.73095703125, "learning_rate": 7.828843106180665e-06, "loss": 2.3555, "step": 342 }, { "epoch": 0.2580887885628292, "grad_norm": 19.62910270690918, "learning_rate": 7.820919175911252e-06, "loss": 2.6621, "step": 343 }, { "epoch": 0.2588412340105342, "grad_norm": 25.195018768310547, "learning_rate": 7.81299524564184e-06, "loss": 2.5732, "step": 344 }, { "epoch": 0.2595936794582393, "grad_norm": 28.618427276611328, "learning_rate": 7.805071315372426e-06, "loss": 2.8193, "step": 345 }, { "epoch": 0.26034612490594433, "grad_norm": 35.68719482421875, "learning_rate": 7.797147385103012e-06, "loss": 2.916, "step": 346 }, { "epoch": 0.26109857035364936, "grad_norm": 28.501699447631836, "learning_rate": 7.789223454833597e-06, "loss": 2.541, "step": 347 }, { "epoch": 0.2618510158013544, "grad_norm": 24.121973037719727, "learning_rate": 7.781299524564184e-06, "loss": 2.7383, "step": 348 }, { "epoch": 0.2626034612490594, "grad_norm": 27.293880462646484, "learning_rate": 7.773375594294771e-06, "loss": 2.8281, "step": 349 }, { "epoch": 0.2633559066967645, "grad_norm": 26.175559997558594, "learning_rate": 7.765451664025358e-06, "loss": 2.9297, "step": 350 }, { "epoch": 0.26410835214446954, "grad_norm": 46.64474868774414, "learning_rate": 7.757527733755943e-06, "loss": 2.8604, "step": 351 }, { "epoch": 0.26486079759217457, "grad_norm": 25.304244995117188, "learning_rate": 7.749603803486529e-06, "loss": 3.1445, "step": 352 }, { "epoch": 0.2656132430398796, "grad_norm": 24.73731803894043, "learning_rate": 7.741679873217116e-06, "loss": 2.4854, "step": 353 }, { "epoch": 0.26636568848758463, "grad_norm": 23.45802116394043, "learning_rate": 7.733755942947703e-06, "loss": 2.5186, "step": 354 }, { "epoch": 0.2671181339352897, "grad_norm": 56.9620361328125, "learning_rate": 7.72583201267829e-06, "loss": 2.8076, "step": 355 }, { "epoch": 0.26787057938299474, "grad_norm": 33.860206604003906, "learning_rate": 7.717908082408875e-06, "loss": 2.6973, "step": 356 }, { "epoch": 0.2686230248306998, "grad_norm": 45.37279510498047, "learning_rate": 7.70998415213946e-06, "loss": 2.8223, "step": 357 }, { "epoch": 0.2693754702784048, "grad_norm": 18.210763931274414, "learning_rate": 7.702060221870048e-06, "loss": 2.4668, "step": 358 }, { "epoch": 0.27012791572610984, "grad_norm": 25.83952522277832, "learning_rate": 7.694136291600635e-06, "loss": 2.7012, "step": 359 }, { "epoch": 0.2708803611738149, "grad_norm": 27.17302894592285, "learning_rate": 7.686212361331222e-06, "loss": 2.4834, "step": 360 }, { "epoch": 0.27163280662151995, "grad_norm": 20.181354522705078, "learning_rate": 7.678288431061807e-06, "loss": 3.4531, "step": 361 }, { "epoch": 0.272385252069225, "grad_norm": 40.30481719970703, "learning_rate": 7.670364500792394e-06, "loss": 2.9521, "step": 362 }, { "epoch": 0.27313769751693, "grad_norm": 53.8978271484375, "learning_rate": 7.66244057052298e-06, "loss": 2.3486, "step": 363 }, { "epoch": 0.27389014296463504, "grad_norm": 28.822010040283203, "learning_rate": 7.654516640253566e-06, "loss": 3.0176, "step": 364 }, { "epoch": 0.2746425884123401, "grad_norm": 30.344985961914062, "learning_rate": 7.646592709984154e-06, "loss": 2.6895, "step": 365 }, { "epoch": 0.27539503386004516, "grad_norm": 31.335912704467773, "learning_rate": 7.638668779714739e-06, "loss": 2.5693, "step": 366 }, { "epoch": 0.2761474793077502, "grad_norm": 29.214174270629883, "learning_rate": 7.630744849445326e-06, "loss": 3.4844, "step": 367 }, { "epoch": 0.2768999247554552, "grad_norm": 31.165367126464844, "learning_rate": 7.622820919175912e-06, "loss": 2.9199, "step": 368 }, { "epoch": 0.27765237020316025, "grad_norm": 31.91497230529785, "learning_rate": 7.614896988906498e-06, "loss": 3.2559, "step": 369 }, { "epoch": 0.27840481565086533, "grad_norm": 17.61781120300293, "learning_rate": 7.606973058637085e-06, "loss": 2.332, "step": 370 }, { "epoch": 0.27915726109857036, "grad_norm": 22.988460540771484, "learning_rate": 7.5990491283676715e-06, "loss": 2.3535, "step": 371 }, { "epoch": 0.2799097065462754, "grad_norm": 22.7020263671875, "learning_rate": 7.591125198098257e-06, "loss": 2.9844, "step": 372 }, { "epoch": 0.2806621519939804, "grad_norm": 21.630903244018555, "learning_rate": 7.583201267828844e-06, "loss": 2.5059, "step": 373 }, { "epoch": 0.28141459744168545, "grad_norm": 17.710723876953125, "learning_rate": 7.57527733755943e-06, "loss": 2.8945, "step": 374 }, { "epoch": 0.28216704288939054, "grad_norm": 19.4321231842041, "learning_rate": 7.567353407290017e-06, "loss": 2.8369, "step": 375 }, { "epoch": 0.28291948833709557, "grad_norm": 48.42848587036133, "learning_rate": 7.559429477020603e-06, "loss": 3.0225, "step": 376 }, { "epoch": 0.2836719337848006, "grad_norm": 34.190399169921875, "learning_rate": 7.551505546751189e-06, "loss": 2.4053, "step": 377 }, { "epoch": 0.28442437923250563, "grad_norm": 19.983348846435547, "learning_rate": 7.543581616481776e-06, "loss": 2.6807, "step": 378 }, { "epoch": 0.28517682468021066, "grad_norm": 19.783525466918945, "learning_rate": 7.535657686212362e-06, "loss": 2.5547, "step": 379 }, { "epoch": 0.28592927012791575, "grad_norm": 49.98888397216797, "learning_rate": 7.527733755942949e-06, "loss": 3.2402, "step": 380 }, { "epoch": 0.2866817155756208, "grad_norm": 38.5321044921875, "learning_rate": 7.519809825673535e-06, "loss": 2.9453, "step": 381 }, { "epoch": 0.2874341610233258, "grad_norm": 36.0396842956543, "learning_rate": 7.5118858954041205e-06, "loss": 2.6807, "step": 382 }, { "epoch": 0.28818660647103084, "grad_norm": 24.879011154174805, "learning_rate": 7.5039619651347075e-06, "loss": 2.4502, "step": 383 }, { "epoch": 0.28893905191873587, "grad_norm": 22.27727699279785, "learning_rate": 7.496038034865294e-06, "loss": 2.5293, "step": 384 }, { "epoch": 0.28969149736644095, "grad_norm": 45.97170639038086, "learning_rate": 7.488114104595881e-06, "loss": 2.7891, "step": 385 }, { "epoch": 0.290443942814146, "grad_norm": 51.14545822143555, "learning_rate": 7.480190174326466e-06, "loss": 3.1641, "step": 386 }, { "epoch": 0.291196388261851, "grad_norm": 58.7318000793457, "learning_rate": 7.472266244057052e-06, "loss": 2.4834, "step": 387 }, { "epoch": 0.29194883370955604, "grad_norm": 38.557979583740234, "learning_rate": 7.464342313787639e-06, "loss": 2.4551, "step": 388 }, { "epoch": 0.2927012791572611, "grad_norm": 30.150957107543945, "learning_rate": 7.4564183835182255e-06, "loss": 3.127, "step": 389 }, { "epoch": 0.29345372460496616, "grad_norm": 24.9825496673584, "learning_rate": 7.4484944532488126e-06, "loss": 2.9766, "step": 390 }, { "epoch": 0.2942061700526712, "grad_norm": 34.27793884277344, "learning_rate": 7.440570522979398e-06, "loss": 2.8467, "step": 391 }, { "epoch": 0.2949586155003762, "grad_norm": 40.562530517578125, "learning_rate": 7.432646592709984e-06, "loss": 2.7354, "step": 392 }, { "epoch": 0.29571106094808125, "grad_norm": 27.370962142944336, "learning_rate": 7.424722662440571e-06, "loss": 2.6748, "step": 393 }, { "epoch": 0.2964635063957863, "grad_norm": 23.176212310791016, "learning_rate": 7.416798732171157e-06, "loss": 2.3291, "step": 394 }, { "epoch": 0.29721595184349137, "grad_norm": 20.481536865234375, "learning_rate": 7.408874801901744e-06, "loss": 2.5684, "step": 395 }, { "epoch": 0.2979683972911964, "grad_norm": 18.736602783203125, "learning_rate": 7.40095087163233e-06, "loss": 2.4946, "step": 396 }, { "epoch": 0.2987208427389014, "grad_norm": 21.316904067993164, "learning_rate": 7.393026941362916e-06, "loss": 2.8203, "step": 397 }, { "epoch": 0.29947328818660646, "grad_norm": 23.690597534179688, "learning_rate": 7.385103011093503e-06, "loss": 3.0039, "step": 398 }, { "epoch": 0.3002257336343115, "grad_norm": 37.80546188354492, "learning_rate": 7.377179080824089e-06, "loss": 2.4521, "step": 399 }, { "epoch": 0.3009781790820166, "grad_norm": 34.161041259765625, "learning_rate": 7.369255150554676e-06, "loss": 2.6895, "step": 400 }, { "epoch": 0.3017306245297216, "grad_norm": 30.497211456298828, "learning_rate": 7.3613312202852615e-06, "loss": 3.082, "step": 401 }, { "epoch": 0.30248306997742663, "grad_norm": 18.200454711914062, "learning_rate": 7.3534072900158486e-06, "loss": 2.8096, "step": 402 }, { "epoch": 0.30323551542513166, "grad_norm": 21.7595157623291, "learning_rate": 7.345483359746435e-06, "loss": 2.9072, "step": 403 }, { "epoch": 0.3039879608728367, "grad_norm": 27.392026901245117, "learning_rate": 7.337559429477021e-06, "loss": 3.0117, "step": 404 }, { "epoch": 0.3047404063205418, "grad_norm": 26.6084041595459, "learning_rate": 7.329635499207608e-06, "loss": 2.3936, "step": 405 }, { "epoch": 0.3054928517682468, "grad_norm": 27.991764068603516, "learning_rate": 7.321711568938193e-06, "loss": 2.4004, "step": 406 }, { "epoch": 0.30624529721595184, "grad_norm": 14.712749481201172, "learning_rate": 7.31378763866878e-06, "loss": 2.458, "step": 407 }, { "epoch": 0.30699774266365687, "grad_norm": 23.980497360229492, "learning_rate": 7.305863708399367e-06, "loss": 3.0781, "step": 408 }, { "epoch": 0.3077501881113619, "grad_norm": 19.64727783203125, "learning_rate": 7.297939778129954e-06, "loss": 2.7021, "step": 409 }, { "epoch": 0.308502633559067, "grad_norm": 24.826862335205078, "learning_rate": 7.29001584786054e-06, "loss": 2.7246, "step": 410 }, { "epoch": 0.309255079006772, "grad_norm": 27.15070343017578, "learning_rate": 7.282091917591125e-06, "loss": 2.8262, "step": 411 }, { "epoch": 0.31000752445447705, "grad_norm": 19.45827293395996, "learning_rate": 7.274167987321712e-06, "loss": 2.8467, "step": 412 }, { "epoch": 0.3107599699021821, "grad_norm": 28.588729858398438, "learning_rate": 7.266244057052298e-06, "loss": 2.8027, "step": 413 }, { "epoch": 0.3115124153498871, "grad_norm": 20.850120544433594, "learning_rate": 7.2583201267828854e-06, "loss": 2.4756, "step": 414 }, { "epoch": 0.3122648607975922, "grad_norm": 40.70508575439453, "learning_rate": 7.250396196513472e-06, "loss": 2.9219, "step": 415 }, { "epoch": 0.3130173062452972, "grad_norm": 44.51210403442383, "learning_rate": 7.242472266244057e-06, "loss": 2.8672, "step": 416 }, { "epoch": 0.31376975169300225, "grad_norm": 25.947845458984375, "learning_rate": 7.234548335974644e-06, "loss": 2.8848, "step": 417 }, { "epoch": 0.3145221971407073, "grad_norm": 31.283601760864258, "learning_rate": 7.22662440570523e-06, "loss": 2.834, "step": 418 }, { "epoch": 0.3152746425884123, "grad_norm": 32.150917053222656, "learning_rate": 7.218700475435817e-06, "loss": 2.6348, "step": 419 }, { "epoch": 0.3160270880361174, "grad_norm": 25.346118927001953, "learning_rate": 7.2107765451664034e-06, "loss": 2.8711, "step": 420 }, { "epoch": 0.31677953348382243, "grad_norm": 33.96476364135742, "learning_rate": 7.202852614896989e-06, "loss": 3.2031, "step": 421 }, { "epoch": 0.31753197893152746, "grad_norm": 19.30603790283203, "learning_rate": 7.194928684627576e-06, "loss": 2.668, "step": 422 }, { "epoch": 0.3182844243792325, "grad_norm": 19.03653907775879, "learning_rate": 7.187004754358162e-06, "loss": 2.4395, "step": 423 }, { "epoch": 0.3190368698269376, "grad_norm": 26.007007598876953, "learning_rate": 7.179080824088749e-06, "loss": 2.6426, "step": 424 }, { "epoch": 0.3197893152746426, "grad_norm": 19.245302200317383, "learning_rate": 7.171156893819335e-06, "loss": 2.8105, "step": 425 }, { "epoch": 0.32054176072234764, "grad_norm": 19.74489402770996, "learning_rate": 7.163232963549921e-06, "loss": 2.9902, "step": 426 }, { "epoch": 0.32129420617005267, "grad_norm": 39.224300384521484, "learning_rate": 7.155309033280508e-06, "loss": 3.1826, "step": 427 }, { "epoch": 0.3220466516177577, "grad_norm": 19.44862174987793, "learning_rate": 7.147385103011094e-06, "loss": 2.4424, "step": 428 }, { "epoch": 0.3227990970654628, "grad_norm": 19.365638732910156, "learning_rate": 7.139461172741681e-06, "loss": 3.041, "step": 429 }, { "epoch": 0.3235515425131678, "grad_norm": 44.24783706665039, "learning_rate": 7.131537242472267e-06, "loss": 2.9551, "step": 430 }, { "epoch": 0.32430398796087284, "grad_norm": 44.232093811035156, "learning_rate": 7.123613312202852e-06, "loss": 2.8555, "step": 431 }, { "epoch": 0.32505643340857787, "grad_norm": 21.96976661682129, "learning_rate": 7.1156893819334394e-06, "loss": 2.7236, "step": 432 }, { "epoch": 0.3258088788562829, "grad_norm": 36.008201599121094, "learning_rate": 7.107765451664026e-06, "loss": 2.7588, "step": 433 }, { "epoch": 0.326561324303988, "grad_norm": 22.36960220336914, "learning_rate": 7.099841521394613e-06, "loss": 2.2725, "step": 434 }, { "epoch": 0.327313769751693, "grad_norm": 41.12551498413086, "learning_rate": 7.091917591125199e-06, "loss": 2.7461, "step": 435 }, { "epoch": 0.32806621519939805, "grad_norm": 42.819915771484375, "learning_rate": 7.083993660855785e-06, "loss": 2.998, "step": 436 }, { "epoch": 0.3288186606471031, "grad_norm": 82.13551330566406, "learning_rate": 7.076069730586371e-06, "loss": 3.2363, "step": 437 }, { "epoch": 0.3295711060948081, "grad_norm": 43.233760833740234, "learning_rate": 7.0681458003169574e-06, "loss": 2.2656, "step": 438 }, { "epoch": 0.3303235515425132, "grad_norm": 19.841270446777344, "learning_rate": 7.0602218700475445e-06, "loss": 2.7637, "step": 439 }, { "epoch": 0.3310759969902182, "grad_norm": 20.722280502319336, "learning_rate": 7.052297939778131e-06, "loss": 3.1152, "step": 440 }, { "epoch": 0.33182844243792325, "grad_norm": 56.44236755371094, "learning_rate": 7.044374009508717e-06, "loss": 2.8633, "step": 441 }, { "epoch": 0.3325808878856283, "grad_norm": 28.693634033203125, "learning_rate": 7.036450079239303e-06, "loss": 2.9644, "step": 442 }, { "epoch": 0.3333333333333333, "grad_norm": 20.19970703125, "learning_rate": 7.02852614896989e-06, "loss": 2.374, "step": 443 }, { "epoch": 0.3340857787810384, "grad_norm": 37.031002044677734, "learning_rate": 7.020602218700476e-06, "loss": 3.0312, "step": 444 }, { "epoch": 0.33483822422874343, "grad_norm": 25.726011276245117, "learning_rate": 7.0126782884310625e-06, "loss": 2.6094, "step": 445 }, { "epoch": 0.33559066967644846, "grad_norm": 19.664955139160156, "learning_rate": 7.004754358161649e-06, "loss": 2.498, "step": 446 }, { "epoch": 0.3363431151241535, "grad_norm": 35.93477249145508, "learning_rate": 6.996830427892235e-06, "loss": 2.8789, "step": 447 }, { "epoch": 0.3370955605718585, "grad_norm": 24.938539505004883, "learning_rate": 6.988906497622822e-06, "loss": 3.0039, "step": 448 }, { "epoch": 0.3378480060195636, "grad_norm": 18.138105392456055, "learning_rate": 6.980982567353408e-06, "loss": 2.3467, "step": 449 }, { "epoch": 0.33860045146726864, "grad_norm": 30.399091720581055, "learning_rate": 6.973058637083995e-06, "loss": 2.3643, "step": 450 }, { "epoch": 0.33935289691497367, "grad_norm": 28.36125373840332, "learning_rate": 6.9651347068145805e-06, "loss": 2.8232, "step": 451 }, { "epoch": 0.3401053423626787, "grad_norm": 24.042072296142578, "learning_rate": 6.957210776545167e-06, "loss": 2.9639, "step": 452 }, { "epoch": 0.34085778781038373, "grad_norm": 20.310359954833984, "learning_rate": 6.949286846275754e-06, "loss": 2.2578, "step": 453 }, { "epoch": 0.3416102332580888, "grad_norm": 31.458209991455078, "learning_rate": 6.94136291600634e-06, "loss": 2.7305, "step": 454 }, { "epoch": 0.34236267870579384, "grad_norm": 16.078969955444336, "learning_rate": 6.933438985736925e-06, "loss": 2.376, "step": 455 }, { "epoch": 0.3431151241534989, "grad_norm": 24.066688537597656, "learning_rate": 6.925515055467512e-06, "loss": 2.7109, "step": 456 }, { "epoch": 0.3438675696012039, "grad_norm": 25.2286376953125, "learning_rate": 6.9175911251980985e-06, "loss": 2.2617, "step": 457 }, { "epoch": 0.34462001504890893, "grad_norm": 18.005447387695312, "learning_rate": 6.9096671949286855e-06, "loss": 2.5898, "step": 458 }, { "epoch": 0.345372460496614, "grad_norm": 41.71696472167969, "learning_rate": 6.901743264659272e-06, "loss": 2.9941, "step": 459 }, { "epoch": 0.34612490594431905, "grad_norm": 20.321922302246094, "learning_rate": 6.893819334389857e-06, "loss": 2.3975, "step": 460 }, { "epoch": 0.3468773513920241, "grad_norm": 35.73655700683594, "learning_rate": 6.885895404120444e-06, "loss": 2.8447, "step": 461 }, { "epoch": 0.3476297968397291, "grad_norm": 26.43678092956543, "learning_rate": 6.87797147385103e-06, "loss": 2.4873, "step": 462 }, { "epoch": 0.34838224228743414, "grad_norm": 17.385337829589844, "learning_rate": 6.870047543581617e-06, "loss": 2.5557, "step": 463 }, { "epoch": 0.3491346877351392, "grad_norm": 19.804067611694336, "learning_rate": 6.8621236133122035e-06, "loss": 2.1064, "step": 464 }, { "epoch": 0.34988713318284426, "grad_norm": 40.8009033203125, "learning_rate": 6.854199683042789e-06, "loss": 2.8848, "step": 465 }, { "epoch": 0.3506395786305493, "grad_norm": 41.40995407104492, "learning_rate": 6.846275752773376e-06, "loss": 2.8926, "step": 466 }, { "epoch": 0.3513920240782543, "grad_norm": 26.25147819519043, "learning_rate": 6.838351822503962e-06, "loss": 2.6045, "step": 467 }, { "epoch": 0.35214446952595935, "grad_norm": 23.980907440185547, "learning_rate": 6.830427892234549e-06, "loss": 2.0391, "step": 468 }, { "epoch": 0.35289691497366443, "grad_norm": 23.586442947387695, "learning_rate": 6.822503961965135e-06, "loss": 2.6167, "step": 469 }, { "epoch": 0.35364936042136946, "grad_norm": 36.71504592895508, "learning_rate": 6.8145800316957216e-06, "loss": 3.2363, "step": 470 }, { "epoch": 0.3544018058690745, "grad_norm": 30.827280044555664, "learning_rate": 6.806656101426308e-06, "loss": 2.9033, "step": 471 }, { "epoch": 0.3551542513167795, "grad_norm": 27.950368881225586, "learning_rate": 6.798732171156894e-06, "loss": 2.4824, "step": 472 }, { "epoch": 0.35590669676448455, "grad_norm": 51.84375, "learning_rate": 6.790808240887481e-06, "loss": 2.6455, "step": 473 }, { "epoch": 0.35665914221218964, "grad_norm": 19.127580642700195, "learning_rate": 6.782884310618067e-06, "loss": 2.043, "step": 474 }, { "epoch": 0.35741158765989467, "grad_norm": 20.604124069213867, "learning_rate": 6.774960380348653e-06, "loss": 3.0059, "step": 475 }, { "epoch": 0.3581640331075997, "grad_norm": 20.379287719726562, "learning_rate": 6.7670364500792396e-06, "loss": 2.6055, "step": 476 }, { "epoch": 0.35891647855530473, "grad_norm": 37.65190124511719, "learning_rate": 6.759112519809827e-06, "loss": 2.3867, "step": 477 }, { "epoch": 0.35966892400300976, "grad_norm": 27.957950592041016, "learning_rate": 6.751188589540413e-06, "loss": 2.7119, "step": 478 }, { "epoch": 0.36042136945071485, "grad_norm": 20.14349937438965, "learning_rate": 6.743264659270999e-06, "loss": 2.6367, "step": 479 }, { "epoch": 0.3611738148984199, "grad_norm": 21.61663818359375, "learning_rate": 6.735340729001585e-06, "loss": 2.7178, "step": 480 }, { "epoch": 0.3619262603461249, "grad_norm": 34.000892639160156, "learning_rate": 6.727416798732171e-06, "loss": 3.0371, "step": 481 }, { "epoch": 0.36267870579382994, "grad_norm": 29.969745635986328, "learning_rate": 6.719492868462758e-06, "loss": 2.4268, "step": 482 }, { "epoch": 0.36343115124153497, "grad_norm": 17.91866111755371, "learning_rate": 6.711568938193345e-06, "loss": 2.4336, "step": 483 }, { "epoch": 0.36418359668924005, "grad_norm": 17.030073165893555, "learning_rate": 6.703645007923932e-06, "loss": 2.4556, "step": 484 }, { "epoch": 0.3649360421369451, "grad_norm": 35.608009338378906, "learning_rate": 6.695721077654517e-06, "loss": 2.8691, "step": 485 }, { "epoch": 0.3656884875846501, "grad_norm": 30.210041046142578, "learning_rate": 6.687797147385103e-06, "loss": 3.2031, "step": 486 }, { "epoch": 0.36644093303235514, "grad_norm": 39.14574432373047, "learning_rate": 6.67987321711569e-06, "loss": 2.6631, "step": 487 }, { "epoch": 0.3671933784800602, "grad_norm": 21.58750343322754, "learning_rate": 6.671949286846276e-06, "loss": 2.6328, "step": 488 }, { "epoch": 0.36794582392776526, "grad_norm": 23.24883270263672, "learning_rate": 6.6640253565768635e-06, "loss": 3.1035, "step": 489 }, { "epoch": 0.3686982693754703, "grad_norm": 29.510072708129883, "learning_rate": 6.656101426307449e-06, "loss": 3.4375, "step": 490 }, { "epoch": 0.3694507148231753, "grad_norm": 19.159259796142578, "learning_rate": 6.648177496038035e-06, "loss": 2.4209, "step": 491 }, { "epoch": 0.37020316027088035, "grad_norm": 33.7953987121582, "learning_rate": 6.640253565768622e-06, "loss": 2.4502, "step": 492 }, { "epoch": 0.3709556057185854, "grad_norm": 20.189437866210938, "learning_rate": 6.632329635499208e-06, "loss": 2.2578, "step": 493 }, { "epoch": 0.37170805116629047, "grad_norm": 24.063936233520508, "learning_rate": 6.624405705229795e-06, "loss": 2.75, "step": 494 }, { "epoch": 0.3724604966139955, "grad_norm": 28.4869384765625, "learning_rate": 6.616481774960381e-06, "loss": 2.5381, "step": 495 }, { "epoch": 0.3732129420617005, "grad_norm": 30.88617706298828, "learning_rate": 6.608557844690967e-06, "loss": 2.5, "step": 496 }, { "epoch": 0.37396538750940556, "grad_norm": 22.271657943725586, "learning_rate": 6.600633914421554e-06, "loss": 3.2891, "step": 497 }, { "epoch": 0.3747178329571106, "grad_norm": 27.613380432128906, "learning_rate": 6.59270998415214e-06, "loss": 2.8301, "step": 498 }, { "epoch": 0.37547027840481567, "grad_norm": 19.099576950073242, "learning_rate": 6.584786053882727e-06, "loss": 2.3486, "step": 499 }, { "epoch": 0.3762227238525207, "grad_norm": 30.90146827697754, "learning_rate": 6.5768621236133124e-06, "loss": 2.4004, "step": 500 }, { "epoch": 0.37697516930022573, "grad_norm": 45.43968200683594, "learning_rate": 6.568938193343899e-06, "loss": 2.4746, "step": 501 }, { "epoch": 0.37772761474793076, "grad_norm": 19.64021110534668, "learning_rate": 6.561014263074486e-06, "loss": 2.0723, "step": 502 }, { "epoch": 0.3784800601956358, "grad_norm": 20.16904067993164, "learning_rate": 6.553090332805072e-06, "loss": 2.3096, "step": 503 }, { "epoch": 0.3792325056433409, "grad_norm": 40.98538589477539, "learning_rate": 6.545166402535659e-06, "loss": 2.3213, "step": 504 }, { "epoch": 0.3799849510910459, "grad_norm": 24.2117862701416, "learning_rate": 6.537242472266244e-06, "loss": 2.7021, "step": 505 }, { "epoch": 0.38073739653875094, "grad_norm": 23.935213088989258, "learning_rate": 6.5293185419968304e-06, "loss": 2.293, "step": 506 }, { "epoch": 0.38148984198645597, "grad_norm": 34.53661346435547, "learning_rate": 6.5213946117274175e-06, "loss": 2.3984, "step": 507 }, { "epoch": 0.382242287434161, "grad_norm": 25.864501953125, "learning_rate": 6.513470681458004e-06, "loss": 2.2148, "step": 508 }, { "epoch": 0.3829947328818661, "grad_norm": 32.563419342041016, "learning_rate": 6.505546751188591e-06, "loss": 3.1729, "step": 509 }, { "epoch": 0.3837471783295711, "grad_norm": 22.276206970214844, "learning_rate": 6.497622820919176e-06, "loss": 2.6211, "step": 510 }, { "epoch": 0.38449962377727614, "grad_norm": 26.751834869384766, "learning_rate": 6.489698890649762e-06, "loss": 2.6377, "step": 511 }, { "epoch": 0.3852520692249812, "grad_norm": 20.746671676635742, "learning_rate": 6.481774960380349e-06, "loss": 2.4854, "step": 512 }, { "epoch": 0.3860045146726862, "grad_norm": 32.32805633544922, "learning_rate": 6.4738510301109355e-06, "loss": 3.1035, "step": 513 }, { "epoch": 0.3867569601203913, "grad_norm": 17.703580856323242, "learning_rate": 6.4659270998415225e-06, "loss": 2.3711, "step": 514 }, { "epoch": 0.3875094055680963, "grad_norm": 27.50429344177246, "learning_rate": 6.458003169572108e-06, "loss": 2.6309, "step": 515 }, { "epoch": 0.38826185101580135, "grad_norm": 21.104827880859375, "learning_rate": 6.450079239302695e-06, "loss": 2.6279, "step": 516 }, { "epoch": 0.3890142964635064, "grad_norm": 20.792558670043945, "learning_rate": 6.442155309033281e-06, "loss": 2.6025, "step": 517 }, { "epoch": 0.3897667419112114, "grad_norm": 35.51402282714844, "learning_rate": 6.434231378763868e-06, "loss": 2.499, "step": 518 }, { "epoch": 0.3905191873589165, "grad_norm": 17.71217155456543, "learning_rate": 6.426307448494454e-06, "loss": 2.1094, "step": 519 }, { "epoch": 0.3912716328066215, "grad_norm": 20.250795364379883, "learning_rate": 6.41838351822504e-06, "loss": 2.8779, "step": 520 }, { "epoch": 0.39202407825432656, "grad_norm": 24.222923278808594, "learning_rate": 6.410459587955627e-06, "loss": 2.5967, "step": 521 }, { "epoch": 0.3927765237020316, "grad_norm": 61.864662170410156, "learning_rate": 6.402535657686213e-06, "loss": 3.1523, "step": 522 }, { "epoch": 0.3935289691497366, "grad_norm": 39.048553466796875, "learning_rate": 6.3946117274168e-06, "loss": 2.7773, "step": 523 }, { "epoch": 0.3942814145974417, "grad_norm": 17.10451316833496, "learning_rate": 6.386687797147385e-06, "loss": 2.4375, "step": 524 }, { "epoch": 0.39503386004514673, "grad_norm": 47.01410675048828, "learning_rate": 6.3787638668779715e-06, "loss": 2.5957, "step": 525 }, { "epoch": 0.39578630549285176, "grad_norm": 29.18340301513672, "learning_rate": 6.3708399366085585e-06, "loss": 2.6543, "step": 526 }, { "epoch": 0.3965387509405568, "grad_norm": 20.794097900390625, "learning_rate": 6.362916006339145e-06, "loss": 2.6143, "step": 527 }, { "epoch": 0.3972911963882618, "grad_norm": 24.712759017944336, "learning_rate": 6.354992076069732e-06, "loss": 2.1914, "step": 528 }, { "epoch": 0.3980436418359669, "grad_norm": 35.4837646484375, "learning_rate": 6.347068145800317e-06, "loss": 2.5078, "step": 529 }, { "epoch": 0.39879608728367194, "grad_norm": 45.86260986328125, "learning_rate": 6.339144215530903e-06, "loss": 3.543, "step": 530 }, { "epoch": 0.39954853273137697, "grad_norm": 59.333621978759766, "learning_rate": 6.33122028526149e-06, "loss": 2.9795, "step": 531 }, { "epoch": 0.400300978179082, "grad_norm": 24.066104888916016, "learning_rate": 6.3232963549920765e-06, "loss": 2.627, "step": 532 }, { "epoch": 0.40105342362678703, "grad_norm": 26.95193099975586, "learning_rate": 6.3153724247226636e-06, "loss": 2.5371, "step": 533 }, { "epoch": 0.4018058690744921, "grad_norm": 23.84280776977539, "learning_rate": 6.307448494453249e-06, "loss": 1.8809, "step": 534 }, { "epoch": 0.40255831452219715, "grad_norm": 22.4331111907959, "learning_rate": 6.299524564183835e-06, "loss": 2.3213, "step": 535 }, { "epoch": 0.4033107599699022, "grad_norm": 22.308320999145508, "learning_rate": 6.291600633914422e-06, "loss": 2.2129, "step": 536 }, { "epoch": 0.4040632054176072, "grad_norm": 26.568504333496094, "learning_rate": 6.283676703645008e-06, "loss": 2.3184, "step": 537 }, { "epoch": 0.40481565086531224, "grad_norm": 66.00670623779297, "learning_rate": 6.275752773375595e-06, "loss": 3.3457, "step": 538 }, { "epoch": 0.4055680963130173, "grad_norm": 43.05216979980469, "learning_rate": 6.267828843106181e-06, "loss": 2.3125, "step": 539 }, { "epoch": 0.40632054176072235, "grad_norm": 31.19127655029297, "learning_rate": 6.259904912836767e-06, "loss": 2.623, "step": 540 }, { "epoch": 0.4070729872084274, "grad_norm": 27.80292320251465, "learning_rate": 6.251980982567354e-06, "loss": 2.7129, "step": 541 }, { "epoch": 0.4078254326561324, "grad_norm": 22.0871524810791, "learning_rate": 6.24405705229794e-06, "loss": 2.5322, "step": 542 }, { "epoch": 0.40857787810383744, "grad_norm": 46.346588134765625, "learning_rate": 6.236133122028527e-06, "loss": 2.9668, "step": 543 }, { "epoch": 0.40933032355154253, "grad_norm": 55.41552734375, "learning_rate": 6.2282091917591125e-06, "loss": 2.542, "step": 544 }, { "epoch": 0.41008276899924756, "grad_norm": 21.744953155517578, "learning_rate": 6.220285261489699e-06, "loss": 2.4756, "step": 545 }, { "epoch": 0.4108352144469526, "grad_norm": 54.682456970214844, "learning_rate": 6.212361331220286e-06, "loss": 2.7295, "step": 546 }, { "epoch": 0.4115876598946576, "grad_norm": 27.935394287109375, "learning_rate": 6.204437400950872e-06, "loss": 2.6572, "step": 547 }, { "epoch": 0.4123401053423627, "grad_norm": 41.32936477661133, "learning_rate": 6.196513470681459e-06, "loss": 2.1665, "step": 548 }, { "epoch": 0.41309255079006774, "grad_norm": 16.199787139892578, "learning_rate": 6.188589540412044e-06, "loss": 2.2158, "step": 549 }, { "epoch": 0.41384499623777277, "grad_norm": 31.044275283813477, "learning_rate": 6.180665610142631e-06, "loss": 2.4707, "step": 550 }, { "epoch": 0.4145974416854778, "grad_norm": 24.700780868530273, "learning_rate": 6.172741679873218e-06, "loss": 2.5107, "step": 551 }, { "epoch": 0.4153498871331828, "grad_norm": 43.95857238769531, "learning_rate": 6.164817749603804e-06, "loss": 2.6475, "step": 552 }, { "epoch": 0.4161023325808879, "grad_norm": 38.148746490478516, "learning_rate": 6.156893819334391e-06, "loss": 2.2852, "step": 553 }, { "epoch": 0.41685477802859294, "grad_norm": 35.26376724243164, "learning_rate": 6.148969889064976e-06, "loss": 2.335, "step": 554 }, { "epoch": 0.417607223476298, "grad_norm": 21.494600296020508, "learning_rate": 6.141045958795563e-06, "loss": 2.7549, "step": 555 }, { "epoch": 0.418359668924003, "grad_norm": 18.99728012084961, "learning_rate": 6.133122028526149e-06, "loss": 2.2207, "step": 556 }, { "epoch": 0.41911211437170803, "grad_norm": 21.479780197143555, "learning_rate": 6.1251980982567364e-06, "loss": 2.292, "step": 557 }, { "epoch": 0.4198645598194131, "grad_norm": 41.83938980102539, "learning_rate": 6.117274167987323e-06, "loss": 3.2812, "step": 558 }, { "epoch": 0.42061700526711815, "grad_norm": 27.518131256103516, "learning_rate": 6.109350237717908e-06, "loss": 2.2949, "step": 559 }, { "epoch": 0.4213694507148232, "grad_norm": 24.70359992980957, "learning_rate": 6.101426307448495e-06, "loss": 2.5342, "step": 560 }, { "epoch": 0.4221218961625282, "grad_norm": 29.205961227416992, "learning_rate": 6.093502377179081e-06, "loss": 2.0991, "step": 561 }, { "epoch": 0.42287434161023324, "grad_norm": 53.45262908935547, "learning_rate": 6.085578446909668e-06, "loss": 2.9941, "step": 562 }, { "epoch": 0.4236267870579383, "grad_norm": 22.154132843017578, "learning_rate": 6.0776545166402544e-06, "loss": 2.748, "step": 563 }, { "epoch": 0.42437923250564336, "grad_norm": 26.759519577026367, "learning_rate": 6.06973058637084e-06, "loss": 2.833, "step": 564 }, { "epoch": 0.4251316779533484, "grad_norm": 38.618797302246094, "learning_rate": 6.061806656101427e-06, "loss": 2.9258, "step": 565 }, { "epoch": 0.4258841234010534, "grad_norm": 25.398147583007812, "learning_rate": 6.053882725832013e-06, "loss": 2.251, "step": 566 }, { "epoch": 0.42663656884875845, "grad_norm": 21.547807693481445, "learning_rate": 6.0459587955626e-06, "loss": 2.4678, "step": 567 }, { "epoch": 0.42738901429646353, "grad_norm": 30.388839721679688, "learning_rate": 6.038034865293186e-06, "loss": 2.3594, "step": 568 }, { "epoch": 0.42814145974416856, "grad_norm": 23.49654197692871, "learning_rate": 6.030110935023772e-06, "loss": 2.5439, "step": 569 }, { "epoch": 0.4288939051918736, "grad_norm": 20.27910614013672, "learning_rate": 6.022187004754359e-06, "loss": 2.6602, "step": 570 }, { "epoch": 0.4296463506395786, "grad_norm": 24.437910079956055, "learning_rate": 6.014263074484945e-06, "loss": 3.2051, "step": 571 }, { "epoch": 0.43039879608728365, "grad_norm": 34.834957122802734, "learning_rate": 6.006339144215532e-06, "loss": 2.5488, "step": 572 }, { "epoch": 0.43115124153498874, "grad_norm": 27.473241806030273, "learning_rate": 5.998415213946118e-06, "loss": 2.6602, "step": 573 }, { "epoch": 0.43190368698269377, "grad_norm": 25.978803634643555, "learning_rate": 5.990491283676703e-06, "loss": 2.6504, "step": 574 }, { "epoch": 0.4326561324303988, "grad_norm": 33.283329010009766, "learning_rate": 5.9825673534072905e-06, "loss": 2.7822, "step": 575 }, { "epoch": 0.43340857787810383, "grad_norm": 26.3125, "learning_rate": 5.974643423137877e-06, "loss": 2.6143, "step": 576 }, { "epoch": 0.43416102332580886, "grad_norm": 17.16096305847168, "learning_rate": 5.966719492868464e-06, "loss": 2.2725, "step": 577 }, { "epoch": 0.43491346877351394, "grad_norm": 16.176515579223633, "learning_rate": 5.95879556259905e-06, "loss": 1.9639, "step": 578 }, { "epoch": 0.435665914221219, "grad_norm": 16.54779815673828, "learning_rate": 5.950871632329635e-06, "loss": 2.4492, "step": 579 }, { "epoch": 0.436418359668924, "grad_norm": 43.56422805786133, "learning_rate": 5.942947702060222e-06, "loss": 3.1377, "step": 580 }, { "epoch": 0.43717080511662904, "grad_norm": 52.872222900390625, "learning_rate": 5.9350237717908085e-06, "loss": 2.4629, "step": 581 }, { "epoch": 0.43792325056433407, "grad_norm": 22.31889533996582, "learning_rate": 5.9270998415213955e-06, "loss": 2.6084, "step": 582 }, { "epoch": 0.43867569601203915, "grad_norm": 19.257272720336914, "learning_rate": 5.919175911251982e-06, "loss": 2.3779, "step": 583 }, { "epoch": 0.4394281414597442, "grad_norm": 42.08190155029297, "learning_rate": 5.911251980982568e-06, "loss": 2.5972, "step": 584 }, { "epoch": 0.4401805869074492, "grad_norm": 30.580303192138672, "learning_rate": 5.903328050713154e-06, "loss": 2.2529, "step": 585 }, { "epoch": 0.44093303235515424, "grad_norm": 23.7304744720459, "learning_rate": 5.89540412044374e-06, "loss": 2.4678, "step": 586 }, { "epoch": 0.44168547780285927, "grad_norm": 40.74665451049805, "learning_rate": 5.887480190174327e-06, "loss": 2.4082, "step": 587 }, { "epoch": 0.44243792325056436, "grad_norm": 40.44230270385742, "learning_rate": 5.8795562599049135e-06, "loss": 2.8574, "step": 588 }, { "epoch": 0.4431903686982694, "grad_norm": 33.095977783203125, "learning_rate": 5.8716323296355e-06, "loss": 3.1426, "step": 589 }, { "epoch": 0.4439428141459744, "grad_norm": 42.75359344482422, "learning_rate": 5.863708399366086e-06, "loss": 2.3594, "step": 590 }, { "epoch": 0.44469525959367945, "grad_norm": 49.90977478027344, "learning_rate": 5.855784469096673e-06, "loss": 2.4434, "step": 591 }, { "epoch": 0.4454477050413845, "grad_norm": 24.432693481445312, "learning_rate": 5.847860538827259e-06, "loss": 2.3477, "step": 592 }, { "epoch": 0.44620015048908956, "grad_norm": 19.295387268066406, "learning_rate": 5.839936608557845e-06, "loss": 2.3105, "step": 593 }, { "epoch": 0.4469525959367946, "grad_norm": 27.823911666870117, "learning_rate": 5.8320126782884315e-06, "loss": 3.0078, "step": 594 }, { "epoch": 0.4477050413844996, "grad_norm": 43.58839797973633, "learning_rate": 5.824088748019018e-06, "loss": 2.6113, "step": 595 }, { "epoch": 0.44845748683220465, "grad_norm": 26.71452522277832, "learning_rate": 5.816164817749605e-06, "loss": 2.5693, "step": 596 }, { "epoch": 0.4492099322799097, "grad_norm": 18.590194702148438, "learning_rate": 5.808240887480191e-06, "loss": 2.3506, "step": 597 }, { "epoch": 0.44996237772761477, "grad_norm": 24.772584915161133, "learning_rate": 5.800316957210776e-06, "loss": 2.7256, "step": 598 }, { "epoch": 0.4507148231753198, "grad_norm": 31.634029388427734, "learning_rate": 5.792393026941363e-06, "loss": 2.8867, "step": 599 }, { "epoch": 0.45146726862302483, "grad_norm": 19.37485694885254, "learning_rate": 5.7844690966719495e-06, "loss": 2.9492, "step": 600 }, { "epoch": 0.45221971407072986, "grad_norm": 16.228641510009766, "learning_rate": 5.7765451664025366e-06, "loss": 2.4717, "step": 601 }, { "epoch": 0.4529721595184349, "grad_norm": 19.831296920776367, "learning_rate": 5.768621236133123e-06, "loss": 1.9902, "step": 602 }, { "epoch": 0.45372460496614, "grad_norm": 19.382762908935547, "learning_rate": 5.760697305863708e-06, "loss": 2.8203, "step": 603 }, { "epoch": 0.454477050413845, "grad_norm": 21.581256866455078, "learning_rate": 5.752773375594295e-06, "loss": 2.4434, "step": 604 }, { "epoch": 0.45522949586155004, "grad_norm": 26.005481719970703, "learning_rate": 5.744849445324881e-06, "loss": 2.6973, "step": 605 }, { "epoch": 0.45598194130925507, "grad_norm": 18.75592803955078, "learning_rate": 5.736925515055468e-06, "loss": 2.292, "step": 606 }, { "epoch": 0.4567343867569601, "grad_norm": 20.964447021484375, "learning_rate": 5.7290015847860546e-06, "loss": 2.6367, "step": 607 }, { "epoch": 0.4574868322046652, "grad_norm": 23.393482208251953, "learning_rate": 5.72107765451664e-06, "loss": 2.4834, "step": 608 }, { "epoch": 0.4582392776523702, "grad_norm": 25.4880428314209, "learning_rate": 5.713153724247227e-06, "loss": 2.3193, "step": 609 }, { "epoch": 0.45899172310007524, "grad_norm": 21.29010772705078, "learning_rate": 5.705229793977813e-06, "loss": 3.0508, "step": 610 }, { "epoch": 0.4597441685477803, "grad_norm": 32.27946853637695, "learning_rate": 5.6973058637084e-06, "loss": 2.3809, "step": 611 }, { "epoch": 0.4604966139954853, "grad_norm": 22.592269897460938, "learning_rate": 5.689381933438986e-06, "loss": 2.6006, "step": 612 }, { "epoch": 0.4612490594431904, "grad_norm": 17.350631713867188, "learning_rate": 5.681458003169572e-06, "loss": 2.3916, "step": 613 }, { "epoch": 0.4620015048908954, "grad_norm": 30.66547203063965, "learning_rate": 5.673534072900159e-06, "loss": 3.041, "step": 614 }, { "epoch": 0.46275395033860045, "grad_norm": 16.543447494506836, "learning_rate": 5.665610142630745e-06, "loss": 2.4219, "step": 615 }, { "epoch": 0.4635063957863055, "grad_norm": 17.638381958007812, "learning_rate": 5.657686212361332e-06, "loss": 2.2246, "step": 616 }, { "epoch": 0.4642588412340105, "grad_norm": 18.163257598876953, "learning_rate": 5.649762282091918e-06, "loss": 2.7139, "step": 617 }, { "epoch": 0.4650112866817156, "grad_norm": 29.288700103759766, "learning_rate": 5.6418383518225035e-06, "loss": 2.5703, "step": 618 }, { "epoch": 0.4657637321294206, "grad_norm": 34.10881042480469, "learning_rate": 5.6339144215530906e-06, "loss": 2.9375, "step": 619 }, { "epoch": 0.46651617757712566, "grad_norm": 16.555177688598633, "learning_rate": 5.625990491283677e-06, "loss": 2.2275, "step": 620 }, { "epoch": 0.4672686230248307, "grad_norm": 19.65677261352539, "learning_rate": 5.618066561014264e-06, "loss": 2.2822, "step": 621 }, { "epoch": 0.4680210684725357, "grad_norm": 21.970407485961914, "learning_rate": 5.61014263074485e-06, "loss": 2.793, "step": 622 }, { "epoch": 0.4687735139202408, "grad_norm": 21.59258460998535, "learning_rate": 5.602218700475436e-06, "loss": 2.166, "step": 623 }, { "epoch": 0.46952595936794583, "grad_norm": 22.609561920166016, "learning_rate": 5.594294770206022e-06, "loss": 2.4883, "step": 624 }, { "epoch": 0.47027840481565086, "grad_norm": 34.6531867980957, "learning_rate": 5.586370839936609e-06, "loss": 2.5146, "step": 625 }, { "epoch": 0.4710308502633559, "grad_norm": 26.691869735717773, "learning_rate": 5.578446909667196e-06, "loss": 2.7393, "step": 626 }, { "epoch": 0.4717832957110609, "grad_norm": 28.01468276977539, "learning_rate": 5.570522979397782e-06, "loss": 2.9395, "step": 627 }, { "epoch": 0.472535741158766, "grad_norm": 25.104557037353516, "learning_rate": 5.562599049128368e-06, "loss": 1.9766, "step": 628 }, { "epoch": 0.47328818660647104, "grad_norm": 19.037647247314453, "learning_rate": 5.554675118858954e-06, "loss": 2.0107, "step": 629 }, { "epoch": 0.47404063205417607, "grad_norm": 19.321691513061523, "learning_rate": 5.546751188589541e-06, "loss": 1.8467, "step": 630 }, { "epoch": 0.4747930775018811, "grad_norm": 36.88700866699219, "learning_rate": 5.5388272583201274e-06, "loss": 1.9414, "step": 631 }, { "epoch": 0.47554552294958613, "grad_norm": 28.881858825683594, "learning_rate": 5.5309033280507145e-06, "loss": 2.6367, "step": 632 }, { "epoch": 0.4762979683972912, "grad_norm": 32.7785758972168, "learning_rate": 5.5229793977813e-06, "loss": 2.2744, "step": 633 }, { "epoch": 0.47705041384499625, "grad_norm": 33.875144958496094, "learning_rate": 5.515055467511886e-06, "loss": 3.0371, "step": 634 }, { "epoch": 0.4778028592927013, "grad_norm": 26.45054817199707, "learning_rate": 5.507131537242473e-06, "loss": 2.4355, "step": 635 }, { "epoch": 0.4785553047404063, "grad_norm": 21.72730827331543, "learning_rate": 5.499207606973059e-06, "loss": 2.2876, "step": 636 }, { "epoch": 0.47930775018811134, "grad_norm": 42.81020736694336, "learning_rate": 5.491283676703646e-06, "loss": 3.3457, "step": 637 }, { "epoch": 0.4800601956358164, "grad_norm": 37.48054504394531, "learning_rate": 5.483359746434232e-06, "loss": 2.5371, "step": 638 }, { "epoch": 0.48081264108352145, "grad_norm": 38.4602165222168, "learning_rate": 5.475435816164818e-06, "loss": 2.4478, "step": 639 }, { "epoch": 0.4815650865312265, "grad_norm": 31.48103141784668, "learning_rate": 5.467511885895405e-06, "loss": 2.6201, "step": 640 }, { "epoch": 0.4823175319789315, "grad_norm": 43.95348358154297, "learning_rate": 5.459587955625991e-06, "loss": 2.6621, "step": 641 }, { "epoch": 0.48306997742663654, "grad_norm": 25.062053680419922, "learning_rate": 5.451664025356578e-06, "loss": 2.6729, "step": 642 }, { "epoch": 0.48382242287434163, "grad_norm": 41.68134689331055, "learning_rate": 5.4437400950871634e-06, "loss": 2.5586, "step": 643 }, { "epoch": 0.48457486832204666, "grad_norm": 28.32135581970215, "learning_rate": 5.43581616481775e-06, "loss": 2.6699, "step": 644 }, { "epoch": 0.4853273137697517, "grad_norm": 45.01399612426758, "learning_rate": 5.427892234548337e-06, "loss": 2.5332, "step": 645 }, { "epoch": 0.4860797592174567, "grad_norm": 34.67613220214844, "learning_rate": 5.419968304278923e-06, "loss": 2.3018, "step": 646 }, { "epoch": 0.48683220466516175, "grad_norm": 19.990108489990234, "learning_rate": 5.41204437400951e-06, "loss": 2.1064, "step": 647 }, { "epoch": 0.48758465011286684, "grad_norm": 20.448556900024414, "learning_rate": 5.404120443740095e-06, "loss": 2.4502, "step": 648 }, { "epoch": 0.48833709556057187, "grad_norm": 16.833580017089844, "learning_rate": 5.3961965134706814e-06, "loss": 2.0293, "step": 649 }, { "epoch": 0.4890895410082769, "grad_norm": 31.61375617980957, "learning_rate": 5.3882725832012685e-06, "loss": 2.4951, "step": 650 }, { "epoch": 0.4898419864559819, "grad_norm": 37.58226013183594, "learning_rate": 5.380348652931855e-06, "loss": 2.8955, "step": 651 }, { "epoch": 0.49059443190368696, "grad_norm": 61.47941589355469, "learning_rate": 5.372424722662442e-06, "loss": 3.2871, "step": 652 }, { "epoch": 0.49134687735139204, "grad_norm": 28.28410530090332, "learning_rate": 5.364500792393027e-06, "loss": 2.3145, "step": 653 }, { "epoch": 0.49209932279909707, "grad_norm": 51.59183883666992, "learning_rate": 5.356576862123613e-06, "loss": 2.7842, "step": 654 }, { "epoch": 0.4928517682468021, "grad_norm": 19.695018768310547, "learning_rate": 5.3486529318542e-06, "loss": 2.2412, "step": 655 }, { "epoch": 0.49360421369450713, "grad_norm": 22.1850528717041, "learning_rate": 5.3407290015847865e-06, "loss": 2.7324, "step": 656 }, { "epoch": 0.49435665914221216, "grad_norm": 22.114850997924805, "learning_rate": 5.3328050713153735e-06, "loss": 2.0723, "step": 657 }, { "epoch": 0.49510910458991725, "grad_norm": 38.758094787597656, "learning_rate": 5.324881141045959e-06, "loss": 2.9834, "step": 658 }, { "epoch": 0.4958615500376223, "grad_norm": 23.552003860473633, "learning_rate": 5.316957210776545e-06, "loss": 2.6865, "step": 659 }, { "epoch": 0.4966139954853273, "grad_norm": 25.014806747436523, "learning_rate": 5.309033280507132e-06, "loss": 2.7842, "step": 660 }, { "epoch": 0.49736644093303234, "grad_norm": 24.715150833129883, "learning_rate": 5.301109350237718e-06, "loss": 1.8623, "step": 661 }, { "epoch": 0.49811888638073737, "grad_norm": 21.476627349853516, "learning_rate": 5.293185419968305e-06, "loss": 2.1055, "step": 662 }, { "epoch": 0.49887133182844245, "grad_norm": 20.212873458862305, "learning_rate": 5.285261489698891e-06, "loss": 2.1934, "step": 663 }, { "epoch": 0.4996237772761475, "grad_norm": 21.110328674316406, "learning_rate": 5.277337559429478e-06, "loss": 2.7734, "step": 664 }, { "epoch": 0.5003762227238525, "grad_norm": 23.664304733276367, "learning_rate": 5.269413629160064e-06, "loss": 2.3623, "step": 665 }, { "epoch": 0.5011286681715575, "grad_norm": 24.913185119628906, "learning_rate": 5.26148969889065e-06, "loss": 2.5967, "step": 666 }, { "epoch": 0.5018811136192626, "grad_norm": 23.190227508544922, "learning_rate": 5.253565768621236e-06, "loss": 2.6738, "step": 667 }, { "epoch": 0.5026335590669676, "grad_norm": 21.779712677001953, "learning_rate": 5.2456418383518225e-06, "loss": 2.6104, "step": 668 }, { "epoch": 0.5033860045146726, "grad_norm": 26.490779876708984, "learning_rate": 5.2377179080824095e-06, "loss": 2.2998, "step": 669 }, { "epoch": 0.5041384499623778, "grad_norm": 16.361080169677734, "learning_rate": 5.229793977812996e-06, "loss": 2.2891, "step": 670 }, { "epoch": 0.5048908954100828, "grad_norm": 26.185226440429688, "learning_rate": 5.221870047543583e-06, "loss": 2.3906, "step": 671 }, { "epoch": 0.5056433408577878, "grad_norm": 21.47825050354004, "learning_rate": 5.213946117274168e-06, "loss": 2.6611, "step": 672 }, { "epoch": 0.5063957863054929, "grad_norm": 37.27735900878906, "learning_rate": 5.206022187004754e-06, "loss": 2.7812, "step": 673 }, { "epoch": 0.5071482317531979, "grad_norm": 18.173118591308594, "learning_rate": 5.198098256735341e-06, "loss": 1.9253, "step": 674 }, { "epoch": 0.5079006772009029, "grad_norm": 35.55131912231445, "learning_rate": 5.1901743264659275e-06, "loss": 2.1836, "step": 675 }, { "epoch": 0.508653122648608, "grad_norm": 17.132335662841797, "learning_rate": 5.182250396196515e-06, "loss": 2.2422, "step": 676 }, { "epoch": 0.509405568096313, "grad_norm": 17.84613800048828, "learning_rate": 5.1743264659271e-06, "loss": 2.3105, "step": 677 }, { "epoch": 0.510158013544018, "grad_norm": 29.677824020385742, "learning_rate": 5.166402535657686e-06, "loss": 2.6875, "step": 678 }, { "epoch": 0.510910458991723, "grad_norm": 34.693511962890625, "learning_rate": 5.158478605388273e-06, "loss": 2.5439, "step": 679 }, { "epoch": 0.5116629044394282, "grad_norm": 22.879697799682617, "learning_rate": 5.150554675118859e-06, "loss": 2.4609, "step": 680 }, { "epoch": 0.5124153498871332, "grad_norm": 21.615089416503906, "learning_rate": 5.142630744849446e-06, "loss": 2.5283, "step": 681 }, { "epoch": 0.5131677953348383, "grad_norm": 31.141887664794922, "learning_rate": 5.134706814580032e-06, "loss": 2.4053, "step": 682 }, { "epoch": 0.5139202407825433, "grad_norm": 36.289127349853516, "learning_rate": 5.126782884310618e-06, "loss": 2.4961, "step": 683 }, { "epoch": 0.5146726862302483, "grad_norm": 25.587385177612305, "learning_rate": 5.118858954041205e-06, "loss": 2.7744, "step": 684 }, { "epoch": 0.5154251316779533, "grad_norm": 30.78696060180664, "learning_rate": 5.110935023771791e-06, "loss": 2.2627, "step": 685 }, { "epoch": 0.5161775771256584, "grad_norm": 28.154029846191406, "learning_rate": 5.103011093502378e-06, "loss": 2.627, "step": 686 }, { "epoch": 0.5169300225733634, "grad_norm": 23.078495025634766, "learning_rate": 5.0950871632329636e-06, "loss": 2.251, "step": 687 }, { "epoch": 0.5176824680210684, "grad_norm": 18.885520935058594, "learning_rate": 5.08716323296355e-06, "loss": 2.1416, "step": 688 }, { "epoch": 0.5184349134687735, "grad_norm": 21.535594940185547, "learning_rate": 5.079239302694137e-06, "loss": 2.5967, "step": 689 }, { "epoch": 0.5191873589164786, "grad_norm": 23.832807540893555, "learning_rate": 5.071315372424723e-06, "loss": 2.6113, "step": 690 }, { "epoch": 0.5199398043641836, "grad_norm": 25.892114639282227, "learning_rate": 5.06339144215531e-06, "loss": 2.1582, "step": 691 }, { "epoch": 0.5206922498118887, "grad_norm": 40.396018981933594, "learning_rate": 5.055467511885895e-06, "loss": 2.5918, "step": 692 }, { "epoch": 0.5214446952595937, "grad_norm": 30.396682739257812, "learning_rate": 5.0475435816164816e-06, "loss": 2.5098, "step": 693 }, { "epoch": 0.5221971407072987, "grad_norm": 28.615299224853516, "learning_rate": 5.039619651347069e-06, "loss": 2.6514, "step": 694 }, { "epoch": 0.5229495861550038, "grad_norm": 16.894996643066406, "learning_rate": 5.031695721077655e-06, "loss": 2.249, "step": 695 }, { "epoch": 0.5237020316027088, "grad_norm": 27.297014236450195, "learning_rate": 5.023771790808242e-06, "loss": 2.582, "step": 696 }, { "epoch": 0.5244544770504138, "grad_norm": 26.86957550048828, "learning_rate": 5.015847860538827e-06, "loss": 2.1602, "step": 697 }, { "epoch": 0.5252069224981188, "grad_norm": 22.478004455566406, "learning_rate": 5.007923930269414e-06, "loss": 2.208, "step": 698 }, { "epoch": 0.5259593679458239, "grad_norm": 16.2978515625, "learning_rate": 5e-06, "loss": 2.1294, "step": 699 }, { "epoch": 0.526711813393529, "grad_norm": 26.94550895690918, "learning_rate": 4.992076069730587e-06, "loss": 2.9023, "step": 700 }, { "epoch": 0.527464258841234, "grad_norm": 21.26068687438965, "learning_rate": 4.984152139461173e-06, "loss": 1.9463, "step": 701 }, { "epoch": 0.5282167042889391, "grad_norm": 17.80802345275879, "learning_rate": 4.97622820919176e-06, "loss": 2.2588, "step": 702 }, { "epoch": 0.5289691497366441, "grad_norm": 22.475358963012695, "learning_rate": 4.968304278922346e-06, "loss": 2.25, "step": 703 }, { "epoch": 0.5297215951843491, "grad_norm": 29.277667999267578, "learning_rate": 4.960380348652932e-06, "loss": 2.5811, "step": 704 }, { "epoch": 0.5304740406320542, "grad_norm": 45.544376373291016, "learning_rate": 4.952456418383519e-06, "loss": 2.7852, "step": 705 }, { "epoch": 0.5312264860797592, "grad_norm": 22.807954788208008, "learning_rate": 4.944532488114105e-06, "loss": 2.4385, "step": 706 }, { "epoch": 0.5319789315274642, "grad_norm": 24.76763153076172, "learning_rate": 4.936608557844692e-06, "loss": 3.041, "step": 707 }, { "epoch": 0.5327313769751693, "grad_norm": 32.09806823730469, "learning_rate": 4.928684627575278e-06, "loss": 2.3145, "step": 708 }, { "epoch": 0.5334838224228743, "grad_norm": 23.560874938964844, "learning_rate": 4.920760697305864e-06, "loss": 3.0161, "step": 709 }, { "epoch": 0.5342362678705794, "grad_norm": 20.40456199645996, "learning_rate": 4.912836767036451e-06, "loss": 2.251, "step": 710 }, { "epoch": 0.5349887133182845, "grad_norm": 29.588829040527344, "learning_rate": 4.904912836767036e-06, "loss": 2.5146, "step": 711 }, { "epoch": 0.5357411587659895, "grad_norm": 29.529502868652344, "learning_rate": 4.8969889064976235e-06, "loss": 3.1211, "step": 712 }, { "epoch": 0.5364936042136945, "grad_norm": 17.787813186645508, "learning_rate": 4.88906497622821e-06, "loss": 2.3545, "step": 713 }, { "epoch": 0.5372460496613995, "grad_norm": 21.947816848754883, "learning_rate": 4.881141045958796e-06, "loss": 2.375, "step": 714 }, { "epoch": 0.5379984951091046, "grad_norm": 22.692014694213867, "learning_rate": 4.873217115689383e-06, "loss": 2.8008, "step": 715 }, { "epoch": 0.5387509405568096, "grad_norm": 19.035554885864258, "learning_rate": 4.865293185419968e-06, "loss": 2.7129, "step": 716 }, { "epoch": 0.5395033860045146, "grad_norm": 15.194079399108887, "learning_rate": 4.857369255150555e-06, "loss": 2.3203, "step": 717 }, { "epoch": 0.5402558314522197, "grad_norm": 18.011316299438477, "learning_rate": 4.8494453248811415e-06, "loss": 2.3613, "step": 718 }, { "epoch": 0.5410082768999247, "grad_norm": 21.924701690673828, "learning_rate": 4.841521394611728e-06, "loss": 2.4072, "step": 719 }, { "epoch": 0.5417607223476298, "grad_norm": 40.228458404541016, "learning_rate": 4.833597464342314e-06, "loss": 2.6533, "step": 720 }, { "epoch": 0.5425131677953349, "grad_norm": 16.44277572631836, "learning_rate": 4.825673534072901e-06, "loss": 1.8018, "step": 721 }, { "epoch": 0.5432656132430399, "grad_norm": 25.804616928100586, "learning_rate": 4.817749603803487e-06, "loss": 2.4727, "step": 722 }, { "epoch": 0.5440180586907449, "grad_norm": 26.694299697875977, "learning_rate": 4.809825673534073e-06, "loss": 3.1221, "step": 723 }, { "epoch": 0.54477050413845, "grad_norm": 26.506877899169922, "learning_rate": 4.8019017432646595e-06, "loss": 2.3232, "step": 724 }, { "epoch": 0.545522949586155, "grad_norm": 24.158933639526367, "learning_rate": 4.793977812995246e-06, "loss": 2.7012, "step": 725 }, { "epoch": 0.54627539503386, "grad_norm": 25.846845626831055, "learning_rate": 4.786053882725833e-06, "loss": 2.5986, "step": 726 }, { "epoch": 0.547027840481565, "grad_norm": 24.62186622619629, "learning_rate": 4.778129952456419e-06, "loss": 2.5259, "step": 727 }, { "epoch": 0.5477802859292701, "grad_norm": 26.85883331298828, "learning_rate": 4.770206022187005e-06, "loss": 3.0781, "step": 728 }, { "epoch": 0.5485327313769752, "grad_norm": 27.05722427368164, "learning_rate": 4.762282091917591e-06, "loss": 2.2344, "step": 729 }, { "epoch": 0.5492851768246803, "grad_norm": 30.71999740600586, "learning_rate": 4.7543581616481775e-06, "loss": 2.6318, "step": 730 }, { "epoch": 0.5500376222723853, "grad_norm": 19.75069236755371, "learning_rate": 4.7464342313787645e-06, "loss": 2.0889, "step": 731 }, { "epoch": 0.5507900677200903, "grad_norm": 20.5186710357666, "learning_rate": 4.738510301109351e-06, "loss": 2.2495, "step": 732 }, { "epoch": 0.5515425131677953, "grad_norm": 30.734027862548828, "learning_rate": 4.730586370839937e-06, "loss": 2.5781, "step": 733 }, { "epoch": 0.5522949586155004, "grad_norm": 27.587772369384766, "learning_rate": 4.722662440570523e-06, "loss": 2.085, "step": 734 }, { "epoch": 0.5530474040632054, "grad_norm": 21.96542739868164, "learning_rate": 4.714738510301109e-06, "loss": 2.6475, "step": 735 }, { "epoch": 0.5537998495109104, "grad_norm": 34.89696502685547, "learning_rate": 4.706814580031696e-06, "loss": 2.2959, "step": 736 }, { "epoch": 0.5545522949586155, "grad_norm": 23.43917465209961, "learning_rate": 4.6988906497622825e-06, "loss": 2.3931, "step": 737 }, { "epoch": 0.5553047404063205, "grad_norm": 33.7728385925293, "learning_rate": 4.690966719492869e-06, "loss": 2.75, "step": 738 }, { "epoch": 0.5560571858540256, "grad_norm": 25.656301498413086, "learning_rate": 4.683042789223456e-06, "loss": 2.7324, "step": 739 }, { "epoch": 0.5568096313017307, "grad_norm": 23.849233627319336, "learning_rate": 4.675118858954041e-06, "loss": 2.7832, "step": 740 }, { "epoch": 0.5575620767494357, "grad_norm": 24.854156494140625, "learning_rate": 4.667194928684628e-06, "loss": 2.8164, "step": 741 }, { "epoch": 0.5583145221971407, "grad_norm": 20.215167999267578, "learning_rate": 4.659270998415214e-06, "loss": 2.4922, "step": 742 }, { "epoch": 0.5590669676448458, "grad_norm": 21.410198211669922, "learning_rate": 4.6513470681458005e-06, "loss": 2.6812, "step": 743 }, { "epoch": 0.5598194130925508, "grad_norm": 37.980003356933594, "learning_rate": 4.6434231378763876e-06, "loss": 2.6025, "step": 744 }, { "epoch": 0.5605718585402558, "grad_norm": 27.364931106567383, "learning_rate": 4.635499207606973e-06, "loss": 2.1377, "step": 745 }, { "epoch": 0.5613243039879608, "grad_norm": 36.03047180175781, "learning_rate": 4.62757527733756e-06, "loss": 2.4512, "step": 746 }, { "epoch": 0.5620767494356659, "grad_norm": 20.39134407043457, "learning_rate": 4.619651347068146e-06, "loss": 2.71, "step": 747 }, { "epoch": 0.5628291948833709, "grad_norm": 16.574569702148438, "learning_rate": 4.611727416798732e-06, "loss": 2.2793, "step": 748 }, { "epoch": 0.563581640331076, "grad_norm": 27.37680435180664, "learning_rate": 4.603803486529319e-06, "loss": 2.752, "step": 749 }, { "epoch": 0.5643340857787811, "grad_norm": 19.884410858154297, "learning_rate": 4.595879556259905e-06, "loss": 2.3926, "step": 750 }, { "epoch": 0.5650865312264861, "grad_norm": 19.763639450073242, "learning_rate": 4.587955625990492e-06, "loss": 2.833, "step": 751 }, { "epoch": 0.5658389766741911, "grad_norm": 19.394676208496094, "learning_rate": 4.580031695721078e-06, "loss": 2.3857, "step": 752 }, { "epoch": 0.5665914221218962, "grad_norm": 18.420915603637695, "learning_rate": 4.572107765451664e-06, "loss": 2.376, "step": 753 }, { "epoch": 0.5673438675696012, "grad_norm": 25.283594131469727, "learning_rate": 4.564183835182251e-06, "loss": 2.5928, "step": 754 }, { "epoch": 0.5680963130173062, "grad_norm": 21.629606246948242, "learning_rate": 4.556259904912837e-06, "loss": 2.5918, "step": 755 }, { "epoch": 0.5688487584650113, "grad_norm": 24.263954162597656, "learning_rate": 4.5483359746434236e-06, "loss": 2.2422, "step": 756 }, { "epoch": 0.5696012039127163, "grad_norm": 29.89535903930664, "learning_rate": 4.54041204437401e-06, "loss": 2.3711, "step": 757 }, { "epoch": 0.5703536493604213, "grad_norm": 21.822158813476562, "learning_rate": 4.532488114104596e-06, "loss": 2.6113, "step": 758 }, { "epoch": 0.5711060948081265, "grad_norm": 21.718902587890625, "learning_rate": 4.524564183835183e-06, "loss": 2.46, "step": 759 }, { "epoch": 0.5718585402558315, "grad_norm": 21.7495059967041, "learning_rate": 4.516640253565769e-06, "loss": 2.0186, "step": 760 }, { "epoch": 0.5726109857035365, "grad_norm": 21.98088836669922, "learning_rate": 4.508716323296355e-06, "loss": 2.7881, "step": 761 }, { "epoch": 0.5733634311512416, "grad_norm": 43.60248947143555, "learning_rate": 4.500792393026942e-06, "loss": 2.8164, "step": 762 }, { "epoch": 0.5741158765989466, "grad_norm": 20.0263614654541, "learning_rate": 4.492868462757528e-06, "loss": 1.9448, "step": 763 }, { "epoch": 0.5748683220466516, "grad_norm": 21.096193313598633, "learning_rate": 4.484944532488115e-06, "loss": 2.1396, "step": 764 }, { "epoch": 0.5756207674943566, "grad_norm": 19.693714141845703, "learning_rate": 4.477020602218701e-06, "loss": 2.2139, "step": 765 }, { "epoch": 0.5763732129420617, "grad_norm": 19.59661293029785, "learning_rate": 4.469096671949287e-06, "loss": 2.5938, "step": 766 }, { "epoch": 0.5771256583897667, "grad_norm": 17.570878982543945, "learning_rate": 4.461172741679873e-06, "loss": 2.0215, "step": 767 }, { "epoch": 0.5778781038374717, "grad_norm": 29.355201721191406, "learning_rate": 4.45324881141046e-06, "loss": 2.2734, "step": 768 }, { "epoch": 0.5786305492851769, "grad_norm": 36.004547119140625, "learning_rate": 4.445324881141047e-06, "loss": 3.3457, "step": 769 }, { "epoch": 0.5793829947328819, "grad_norm": 21.928863525390625, "learning_rate": 4.437400950871633e-06, "loss": 2.2832, "step": 770 }, { "epoch": 0.5801354401805869, "grad_norm": 24.527210235595703, "learning_rate": 4.429477020602219e-06, "loss": 2.8252, "step": 771 }, { "epoch": 0.580887885628292, "grad_norm": 30.435258865356445, "learning_rate": 4.421553090332805e-06, "loss": 2.1875, "step": 772 }, { "epoch": 0.581640331075997, "grad_norm": 21.116355895996094, "learning_rate": 4.413629160063391e-06, "loss": 2.2197, "step": 773 }, { "epoch": 0.582392776523702, "grad_norm": 22.062442779541016, "learning_rate": 4.4057052297939784e-06, "loss": 2.8711, "step": 774 }, { "epoch": 0.5831452219714071, "grad_norm": 25.258756637573242, "learning_rate": 4.397781299524565e-06, "loss": 2.0986, "step": 775 }, { "epoch": 0.5838976674191121, "grad_norm": 31.283647537231445, "learning_rate": 4.389857369255151e-06, "loss": 2.1328, "step": 776 }, { "epoch": 0.5846501128668171, "grad_norm": 20.281776428222656, "learning_rate": 4.381933438985737e-06, "loss": 2.167, "step": 777 }, { "epoch": 0.5854025583145221, "grad_norm": 27.260391235351562, "learning_rate": 4.374009508716324e-06, "loss": 2.0811, "step": 778 }, { "epoch": 0.5861550037622273, "grad_norm": 37.832515716552734, "learning_rate": 4.36608557844691e-06, "loss": 2.9639, "step": 779 }, { "epoch": 0.5869074492099323, "grad_norm": 22.24700164794922, "learning_rate": 4.3581616481774964e-06, "loss": 2.3242, "step": 780 }, { "epoch": 0.5876598946576373, "grad_norm": 32.77638626098633, "learning_rate": 4.350237717908083e-06, "loss": 1.8867, "step": 781 }, { "epoch": 0.5884123401053424, "grad_norm": 22.048324584960938, "learning_rate": 4.342313787638669e-06, "loss": 1.8193, "step": 782 }, { "epoch": 0.5891647855530474, "grad_norm": 21.58878517150879, "learning_rate": 4.334389857369256e-06, "loss": 3.0986, "step": 783 }, { "epoch": 0.5899172310007524, "grad_norm": 18.198331832885742, "learning_rate": 4.326465927099842e-06, "loss": 1.9155, "step": 784 }, { "epoch": 0.5906696764484575, "grad_norm": 31.84845733642578, "learning_rate": 4.318541996830428e-06, "loss": 1.9473, "step": 785 }, { "epoch": 0.5914221218961625, "grad_norm": 24.339372634887695, "learning_rate": 4.3106180665610144e-06, "loss": 2.5181, "step": 786 }, { "epoch": 0.5921745673438675, "grad_norm": 26.267627716064453, "learning_rate": 4.302694136291601e-06, "loss": 2.3174, "step": 787 }, { "epoch": 0.5929270127915726, "grad_norm": 28.163755416870117, "learning_rate": 4.294770206022188e-06, "loss": 2.3691, "step": 788 }, { "epoch": 0.5936794582392777, "grad_norm": 23.921737670898438, "learning_rate": 4.286846275752774e-06, "loss": 1.9277, "step": 789 }, { "epoch": 0.5944319036869827, "grad_norm": 30.403043746948242, "learning_rate": 4.27892234548336e-06, "loss": 2.7305, "step": 790 }, { "epoch": 0.5951843491346878, "grad_norm": 21.035133361816406, "learning_rate": 4.270998415213946e-06, "loss": 1.9492, "step": 791 }, { "epoch": 0.5959367945823928, "grad_norm": 25.801963806152344, "learning_rate": 4.2630744849445325e-06, "loss": 2.6982, "step": 792 }, { "epoch": 0.5966892400300978, "grad_norm": 28.979450225830078, "learning_rate": 4.2551505546751195e-06, "loss": 2.3574, "step": 793 }, { "epoch": 0.5974416854778029, "grad_norm": 34.52888107299805, "learning_rate": 4.247226624405706e-06, "loss": 2.5796, "step": 794 }, { "epoch": 0.5981941309255079, "grad_norm": 24.638835906982422, "learning_rate": 4.239302694136292e-06, "loss": 1.9937, "step": 795 }, { "epoch": 0.5989465763732129, "grad_norm": 32.92772674560547, "learning_rate": 4.231378763866879e-06, "loss": 2.0918, "step": 796 }, { "epoch": 0.5996990218209179, "grad_norm": 34.20918273925781, "learning_rate": 4.223454833597464e-06, "loss": 2.6455, "step": 797 }, { "epoch": 0.600451467268623, "grad_norm": 21.07083511352539, "learning_rate": 4.215530903328051e-06, "loss": 2.2432, "step": 798 }, { "epoch": 0.6012039127163281, "grad_norm": 22.572980880737305, "learning_rate": 4.2076069730586375e-06, "loss": 2.1455, "step": 799 }, { "epoch": 0.6019563581640331, "grad_norm": 19.050878524780273, "learning_rate": 4.199683042789224e-06, "loss": 1.998, "step": 800 }, { "epoch": 0.6027088036117382, "grad_norm": 17.435394287109375, "learning_rate": 4.191759112519811e-06, "loss": 2.0913, "step": 801 }, { "epoch": 0.6034612490594432, "grad_norm": 24.223140716552734, "learning_rate": 4.183835182250396e-06, "loss": 2.2686, "step": 802 }, { "epoch": 0.6042136945071482, "grad_norm": 22.403162002563477, "learning_rate": 4.175911251980983e-06, "loss": 2.7402, "step": 803 }, { "epoch": 0.6049661399548533, "grad_norm": 24.95384979248047, "learning_rate": 4.167987321711569e-06, "loss": 2.5898, "step": 804 }, { "epoch": 0.6057185854025583, "grad_norm": 54.6262321472168, "learning_rate": 4.1600633914421555e-06, "loss": 2.5508, "step": 805 }, { "epoch": 0.6064710308502633, "grad_norm": 31.041257858276367, "learning_rate": 4.1521394611727425e-06, "loss": 2.2725, "step": 806 }, { "epoch": 0.6072234762979684, "grad_norm": 25.92288589477539, "learning_rate": 4.144215530903328e-06, "loss": 2.2832, "step": 807 }, { "epoch": 0.6079759217456734, "grad_norm": 30.848102569580078, "learning_rate": 4.136291600633915e-06, "loss": 2.2432, "step": 808 }, { "epoch": 0.6087283671933785, "grad_norm": 25.898963928222656, "learning_rate": 4.128367670364501e-06, "loss": 2.5078, "step": 809 }, { "epoch": 0.6094808126410836, "grad_norm": 19.408071517944336, "learning_rate": 4.120443740095087e-06, "loss": 2.458, "step": 810 }, { "epoch": 0.6102332580887886, "grad_norm": 24.330867767333984, "learning_rate": 4.112519809825674e-06, "loss": 2.3916, "step": 811 }, { "epoch": 0.6109857035364936, "grad_norm": 38.193077087402344, "learning_rate": 4.1045958795562605e-06, "loss": 2.1689, "step": 812 }, { "epoch": 0.6117381489841986, "grad_norm": 19.677576065063477, "learning_rate": 4.096671949286847e-06, "loss": 2.0405, "step": 813 }, { "epoch": 0.6124905944319037, "grad_norm": 25.960607528686523, "learning_rate": 4.088748019017433e-06, "loss": 2.8359, "step": 814 }, { "epoch": 0.6132430398796087, "grad_norm": 26.78199005126953, "learning_rate": 4.080824088748019e-06, "loss": 2.7383, "step": 815 }, { "epoch": 0.6139954853273137, "grad_norm": 37.22509002685547, "learning_rate": 4.072900158478606e-06, "loss": 2.3555, "step": 816 }, { "epoch": 0.6147479307750188, "grad_norm": 21.195919036865234, "learning_rate": 4.064976228209192e-06, "loss": 2.0752, "step": 817 }, { "epoch": 0.6155003762227238, "grad_norm": 24.543123245239258, "learning_rate": 4.0570522979397786e-06, "loss": 2.3223, "step": 818 }, { "epoch": 0.6162528216704289, "grad_norm": 23.601247787475586, "learning_rate": 4.049128367670365e-06, "loss": 2.4287, "step": 819 }, { "epoch": 0.617005267118134, "grad_norm": 23.073978424072266, "learning_rate": 4.041204437400951e-06, "loss": 2.0996, "step": 820 }, { "epoch": 0.617757712565839, "grad_norm": 23.573352813720703, "learning_rate": 4.033280507131538e-06, "loss": 2.46, "step": 821 }, { "epoch": 0.618510158013544, "grad_norm": 31.260934829711914, "learning_rate": 4.025356576862124e-06, "loss": 2.915, "step": 822 }, { "epoch": 0.6192626034612491, "grad_norm": 24.224809646606445, "learning_rate": 4.01743264659271e-06, "loss": 3.0488, "step": 823 }, { "epoch": 0.6200150489089541, "grad_norm": 27.658130645751953, "learning_rate": 4.0095087163232966e-06, "loss": 2.1646, "step": 824 }, { "epoch": 0.6207674943566591, "grad_norm": 30.932138442993164, "learning_rate": 4.001584786053883e-06, "loss": 2.7983, "step": 825 }, { "epoch": 0.6215199398043642, "grad_norm": 25.3236141204834, "learning_rate": 3.993660855784469e-06, "loss": 2.0117, "step": 826 }, { "epoch": 0.6222723852520692, "grad_norm": 24.713939666748047, "learning_rate": 3.985736925515056e-06, "loss": 1.8252, "step": 827 }, { "epoch": 0.6230248306997742, "grad_norm": 22.71609878540039, "learning_rate": 3.977812995245642e-06, "loss": 2.4561, "step": 828 }, { "epoch": 0.6237772761474794, "grad_norm": 19.888782501220703, "learning_rate": 3.969889064976228e-06, "loss": 2.2373, "step": 829 }, { "epoch": 0.6245297215951844, "grad_norm": 27.303646087646484, "learning_rate": 3.961965134706815e-06, "loss": 2.0645, "step": 830 }, { "epoch": 0.6252821670428894, "grad_norm": 18.7181396484375, "learning_rate": 3.954041204437401e-06, "loss": 2.4932, "step": 831 }, { "epoch": 0.6260346124905944, "grad_norm": 15.501985549926758, "learning_rate": 3.946117274167988e-06, "loss": 2.0273, "step": 832 }, { "epoch": 0.6267870579382995, "grad_norm": 28.724773406982422, "learning_rate": 3.938193343898574e-06, "loss": 2.4346, "step": 833 }, { "epoch": 0.6275395033860045, "grad_norm": 22.180727005004883, "learning_rate": 3.93026941362916e-06, "loss": 2.6465, "step": 834 }, { "epoch": 0.6282919488337095, "grad_norm": 18.778850555419922, "learning_rate": 3.922345483359747e-06, "loss": 2.0625, "step": 835 }, { "epoch": 0.6290443942814146, "grad_norm": 21.990373611450195, "learning_rate": 3.9144215530903326e-06, "loss": 2.1973, "step": 836 }, { "epoch": 0.6297968397291196, "grad_norm": 31.995012283325195, "learning_rate": 3.90649762282092e-06, "loss": 2.3857, "step": 837 }, { "epoch": 0.6305492851768246, "grad_norm": 28.826034545898438, "learning_rate": 3.898573692551506e-06, "loss": 2.0898, "step": 838 }, { "epoch": 0.6313017306245298, "grad_norm": 30.95318603515625, "learning_rate": 3.890649762282092e-06, "loss": 2.5283, "step": 839 }, { "epoch": 0.6320541760722348, "grad_norm": 33.76778030395508, "learning_rate": 3.882725832012679e-06, "loss": 2.0483, "step": 840 }, { "epoch": 0.6328066215199398, "grad_norm": 27.856613159179688, "learning_rate": 3.874801901743264e-06, "loss": 2.2412, "step": 841 }, { "epoch": 0.6335590669676449, "grad_norm": 18.836238861083984, "learning_rate": 3.866877971473851e-06, "loss": 2.1514, "step": 842 }, { "epoch": 0.6343115124153499, "grad_norm": 26.430967330932617, "learning_rate": 3.858954041204438e-06, "loss": 2.147, "step": 843 }, { "epoch": 0.6350639578630549, "grad_norm": 33.526512145996094, "learning_rate": 3.851030110935024e-06, "loss": 1.9834, "step": 844 }, { "epoch": 0.63581640331076, "grad_norm": 18.545738220214844, "learning_rate": 3.843106180665611e-06, "loss": 2.1514, "step": 845 }, { "epoch": 0.636568848758465, "grad_norm": 30.445545196533203, "learning_rate": 3.835182250396197e-06, "loss": 2.6758, "step": 846 }, { "epoch": 0.63732129420617, "grad_norm": 38.89741516113281, "learning_rate": 3.827258320126783e-06, "loss": 2.3945, "step": 847 }, { "epoch": 0.6380737396538751, "grad_norm": 40.87432861328125, "learning_rate": 3.8193343898573694e-06, "loss": 2.9551, "step": 848 }, { "epoch": 0.6388261851015802, "grad_norm": 19.067197799682617, "learning_rate": 3.811410459587956e-06, "loss": 2.2969, "step": 849 }, { "epoch": 0.6395786305492852, "grad_norm": 27.969614028930664, "learning_rate": 3.8034865293185427e-06, "loss": 2.373, "step": 850 }, { "epoch": 0.6403310759969902, "grad_norm": 23.686763763427734, "learning_rate": 3.7955625990491284e-06, "loss": 3.1475, "step": 851 }, { "epoch": 0.6410835214446953, "grad_norm": 29.188365936279297, "learning_rate": 3.787638668779715e-06, "loss": 2.2734, "step": 852 }, { "epoch": 0.6418359668924003, "grad_norm": 20.16975975036621, "learning_rate": 3.7797147385103017e-06, "loss": 2.4541, "step": 853 }, { "epoch": 0.6425884123401053, "grad_norm": 21.829917907714844, "learning_rate": 3.771790808240888e-06, "loss": 2.2627, "step": 854 }, { "epoch": 0.6433408577878104, "grad_norm": 27.086030960083008, "learning_rate": 3.7638668779714745e-06, "loss": 1.8145, "step": 855 }, { "epoch": 0.6440933032355154, "grad_norm": 23.82771873474121, "learning_rate": 3.7559429477020602e-06, "loss": 1.876, "step": 856 }, { "epoch": 0.6448457486832204, "grad_norm": 20.215105056762695, "learning_rate": 3.748019017432647e-06, "loss": 2.2539, "step": 857 }, { "epoch": 0.6455981941309256, "grad_norm": 21.309492111206055, "learning_rate": 3.740095087163233e-06, "loss": 2.3628, "step": 858 }, { "epoch": 0.6463506395786306, "grad_norm": 35.519500732421875, "learning_rate": 3.7321711568938197e-06, "loss": 3.0176, "step": 859 }, { "epoch": 0.6471030850263356, "grad_norm": 35.95414352416992, "learning_rate": 3.7242472266244063e-06, "loss": 2.5986, "step": 860 }, { "epoch": 0.6478555304740407, "grad_norm": 21.86758804321289, "learning_rate": 3.716323296354992e-06, "loss": 2.4932, "step": 861 }, { "epoch": 0.6486079759217457, "grad_norm": 17.709407806396484, "learning_rate": 3.7083993660855787e-06, "loss": 2.165, "step": 862 }, { "epoch": 0.6493604213694507, "grad_norm": 32.432437896728516, "learning_rate": 3.700475435816165e-06, "loss": 2.3838, "step": 863 }, { "epoch": 0.6501128668171557, "grad_norm": 31.911968231201172, "learning_rate": 3.6925515055467515e-06, "loss": 2.2124, "step": 864 }, { "epoch": 0.6508653122648608, "grad_norm": 19.1397762298584, "learning_rate": 3.684627575277338e-06, "loss": 2.5645, "step": 865 }, { "epoch": 0.6516177577125658, "grad_norm": 17.174745559692383, "learning_rate": 3.6767036450079243e-06, "loss": 2.0762, "step": 866 }, { "epoch": 0.6523702031602708, "grad_norm": 26.017173767089844, "learning_rate": 3.6687797147385105e-06, "loss": 2.2939, "step": 867 }, { "epoch": 0.653122648607976, "grad_norm": 17.96502113342285, "learning_rate": 3.6608557844690967e-06, "loss": 2.7549, "step": 868 }, { "epoch": 0.653875094055681, "grad_norm": 19.559343338012695, "learning_rate": 3.6529318541996833e-06, "loss": 2.3613, "step": 869 }, { "epoch": 0.654627539503386, "grad_norm": 18.72821617126465, "learning_rate": 3.64500792393027e-06, "loss": 2.2822, "step": 870 }, { "epoch": 0.6553799849510911, "grad_norm": 18.58492660522461, "learning_rate": 3.637083993660856e-06, "loss": 2.0332, "step": 871 }, { "epoch": 0.6561324303987961, "grad_norm": 25.23973274230957, "learning_rate": 3.6291600633914427e-06, "loss": 2.7051, "step": 872 }, { "epoch": 0.6568848758465011, "grad_norm": 26.061168670654297, "learning_rate": 3.6212361331220285e-06, "loss": 2.126, "step": 873 }, { "epoch": 0.6576373212942062, "grad_norm": 30.92963409423828, "learning_rate": 3.613312202852615e-06, "loss": 2.2412, "step": 874 }, { "epoch": 0.6583897667419112, "grad_norm": 16.77997589111328, "learning_rate": 3.6053882725832017e-06, "loss": 1.4937, "step": 875 }, { "epoch": 0.6591422121896162, "grad_norm": 20.67428207397461, "learning_rate": 3.597464342313788e-06, "loss": 2.3301, "step": 876 }, { "epoch": 0.6598946576373212, "grad_norm": 22.45784568786621, "learning_rate": 3.5895404120443745e-06, "loss": 2.0977, "step": 877 }, { "epoch": 0.6606471030850264, "grad_norm": 30.148887634277344, "learning_rate": 3.5816164817749603e-06, "loss": 2.873, "step": 878 }, { "epoch": 0.6613995485327314, "grad_norm": 21.913610458374023, "learning_rate": 3.573692551505547e-06, "loss": 2.583, "step": 879 }, { "epoch": 0.6621519939804364, "grad_norm": 24.19639015197754, "learning_rate": 3.5657686212361335e-06, "loss": 2.502, "step": 880 }, { "epoch": 0.6629044394281415, "grad_norm": 32.243167877197266, "learning_rate": 3.5578446909667197e-06, "loss": 2.2207, "step": 881 }, { "epoch": 0.6636568848758465, "grad_norm": 25.025768280029297, "learning_rate": 3.5499207606973063e-06, "loss": 2.3164, "step": 882 }, { "epoch": 0.6644093303235515, "grad_norm": 36.78255844116211, "learning_rate": 3.5419968304278925e-06, "loss": 2.5381, "step": 883 }, { "epoch": 0.6651617757712566, "grad_norm": 25.785430908203125, "learning_rate": 3.5340729001584787e-06, "loss": 2.5068, "step": 884 }, { "epoch": 0.6659142212189616, "grad_norm": 24.93991470336914, "learning_rate": 3.5261489698890653e-06, "loss": 2.9014, "step": 885 }, { "epoch": 0.6666666666666666, "grad_norm": 33.389732360839844, "learning_rate": 3.5182250396196515e-06, "loss": 2.4287, "step": 886 }, { "epoch": 0.6674191121143717, "grad_norm": 23.581132888793945, "learning_rate": 3.510301109350238e-06, "loss": 2.1855, "step": 887 }, { "epoch": 0.6681715575620768, "grad_norm": 26.595279693603516, "learning_rate": 3.5023771790808243e-06, "loss": 1.9658, "step": 888 }, { "epoch": 0.6689240030097818, "grad_norm": 19.963623046875, "learning_rate": 3.494453248811411e-06, "loss": 2.2393, "step": 889 }, { "epoch": 0.6696764484574869, "grad_norm": 24.39027976989746, "learning_rate": 3.4865293185419976e-06, "loss": 2.4365, "step": 890 }, { "epoch": 0.6704288939051919, "grad_norm": 19.47262191772461, "learning_rate": 3.4786053882725833e-06, "loss": 1.9961, "step": 891 }, { "epoch": 0.6711813393528969, "grad_norm": 30.582433700561523, "learning_rate": 3.47068145800317e-06, "loss": 2.3809, "step": 892 }, { "epoch": 0.671933784800602, "grad_norm": 17.60356903076172, "learning_rate": 3.462757527733756e-06, "loss": 2.0938, "step": 893 }, { "epoch": 0.672686230248307, "grad_norm": 25.00141143798828, "learning_rate": 3.4548335974643428e-06, "loss": 2.1436, "step": 894 }, { "epoch": 0.673438675696012, "grad_norm": 20.50116729736328, "learning_rate": 3.4469096671949285e-06, "loss": 2.2646, "step": 895 }, { "epoch": 0.674191121143717, "grad_norm": 22.395421981811523, "learning_rate": 3.438985736925515e-06, "loss": 2.4668, "step": 896 }, { "epoch": 0.6749435665914221, "grad_norm": 23.272846221923828, "learning_rate": 3.4310618066561018e-06, "loss": 2.5137, "step": 897 }, { "epoch": 0.6756960120391272, "grad_norm": 18.45476722717285, "learning_rate": 3.423137876386688e-06, "loss": 2.1172, "step": 898 }, { "epoch": 0.6764484574868322, "grad_norm": 20.40255355834961, "learning_rate": 3.4152139461172746e-06, "loss": 2.5439, "step": 899 }, { "epoch": 0.6772009029345373, "grad_norm": 20.203140258789062, "learning_rate": 3.4072900158478608e-06, "loss": 1.998, "step": 900 }, { "epoch": 0.6779533483822423, "grad_norm": 36.98493576049805, "learning_rate": 3.399366085578447e-06, "loss": 2.4912, "step": 901 }, { "epoch": 0.6787057938299473, "grad_norm": 25.70787811279297, "learning_rate": 3.3914421553090336e-06, "loss": 2.9082, "step": 902 }, { "epoch": 0.6794582392776524, "grad_norm": 21.873355865478516, "learning_rate": 3.3835182250396198e-06, "loss": 2.3164, "step": 903 }, { "epoch": 0.6802106847253574, "grad_norm": 20.708681106567383, "learning_rate": 3.3755942947702064e-06, "loss": 2.626, "step": 904 }, { "epoch": 0.6809631301730624, "grad_norm": 24.416845321655273, "learning_rate": 3.3676703645007926e-06, "loss": 2.1973, "step": 905 }, { "epoch": 0.6817155756207675, "grad_norm": 18.85466957092285, "learning_rate": 3.359746434231379e-06, "loss": 1.7402, "step": 906 }, { "epoch": 0.6824680210684725, "grad_norm": 33.928104400634766, "learning_rate": 3.351822503961966e-06, "loss": 3.1211, "step": 907 }, { "epoch": 0.6832204665161776, "grad_norm": 19.205385208129883, "learning_rate": 3.3438985736925516e-06, "loss": 2.2646, "step": 908 }, { "epoch": 0.6839729119638827, "grad_norm": 23.29824447631836, "learning_rate": 3.335974643423138e-06, "loss": 2.5674, "step": 909 }, { "epoch": 0.6847253574115877, "grad_norm": 51.69862365722656, "learning_rate": 3.3280507131537244e-06, "loss": 2.8838, "step": 910 }, { "epoch": 0.6854778028592927, "grad_norm": 31.652620315551758, "learning_rate": 3.320126782884311e-06, "loss": 1.8994, "step": 911 }, { "epoch": 0.6862302483069977, "grad_norm": 34.18386459350586, "learning_rate": 3.3122028526148976e-06, "loss": 2.2725, "step": 912 }, { "epoch": 0.6869826937547028, "grad_norm": 25.95589256286621, "learning_rate": 3.3042789223454834e-06, "loss": 1.8262, "step": 913 }, { "epoch": 0.6877351392024078, "grad_norm": 25.984094619750977, "learning_rate": 3.29635499207607e-06, "loss": 2.5732, "step": 914 }, { "epoch": 0.6884875846501128, "grad_norm": 20.5795841217041, "learning_rate": 3.2884310618066562e-06, "loss": 2.209, "step": 915 }, { "epoch": 0.6892400300978179, "grad_norm": 24.327163696289062, "learning_rate": 3.280507131537243e-06, "loss": 2.834, "step": 916 }, { "epoch": 0.6899924755455229, "grad_norm": 23.493921279907227, "learning_rate": 3.2725832012678294e-06, "loss": 2.3008, "step": 917 }, { "epoch": 0.690744920993228, "grad_norm": 22.002779006958008, "learning_rate": 3.2646592709984152e-06, "loss": 2.5127, "step": 918 }, { "epoch": 0.6914973664409331, "grad_norm": 20.9849853515625, "learning_rate": 3.256735340729002e-06, "loss": 2.3838, "step": 919 }, { "epoch": 0.6922498118886381, "grad_norm": 23.12964630126953, "learning_rate": 3.248811410459588e-06, "loss": 2.4521, "step": 920 }, { "epoch": 0.6930022573363431, "grad_norm": 18.361614227294922, "learning_rate": 3.2408874801901746e-06, "loss": 1.9404, "step": 921 }, { "epoch": 0.6937547027840482, "grad_norm": 25.768903732299805, "learning_rate": 3.2329635499207613e-06, "loss": 2.582, "step": 922 }, { "epoch": 0.6945071482317532, "grad_norm": 18.827003479003906, "learning_rate": 3.2250396196513475e-06, "loss": 2.0742, "step": 923 }, { "epoch": 0.6952595936794582, "grad_norm": 22.65555763244629, "learning_rate": 3.217115689381934e-06, "loss": 2.5566, "step": 924 }, { "epoch": 0.6960120391271633, "grad_norm": 18.19965171813965, "learning_rate": 3.20919175911252e-06, "loss": 2.0063, "step": 925 }, { "epoch": 0.6967644845748683, "grad_norm": 31.940683364868164, "learning_rate": 3.2012678288431065e-06, "loss": 2.1514, "step": 926 }, { "epoch": 0.6975169300225733, "grad_norm": 23.10796546936035, "learning_rate": 3.1933438985736926e-06, "loss": 2.3877, "step": 927 }, { "epoch": 0.6982693754702785, "grad_norm": 23.25333023071289, "learning_rate": 3.1854199683042793e-06, "loss": 2.3535, "step": 928 }, { "epoch": 0.6990218209179835, "grad_norm": 20.867780685424805, "learning_rate": 3.177496038034866e-06, "loss": 2.2783, "step": 929 }, { "epoch": 0.6997742663656885, "grad_norm": 17.495346069335938, "learning_rate": 3.1695721077654516e-06, "loss": 1.9824, "step": 930 }, { "epoch": 0.7005267118133935, "grad_norm": 19.127239227294922, "learning_rate": 3.1616481774960383e-06, "loss": 2.0283, "step": 931 }, { "epoch": 0.7012791572610986, "grad_norm": 25.830289840698242, "learning_rate": 3.1537242472266245e-06, "loss": 3.0107, "step": 932 }, { "epoch": 0.7020316027088036, "grad_norm": 21.201894760131836, "learning_rate": 3.145800316957211e-06, "loss": 2.0293, "step": 933 }, { "epoch": 0.7027840481565086, "grad_norm": 29.03411102294922, "learning_rate": 3.1378763866877977e-06, "loss": 2.5771, "step": 934 }, { "epoch": 0.7035364936042137, "grad_norm": 20.404911041259766, "learning_rate": 3.1299524564183835e-06, "loss": 2.0645, "step": 935 }, { "epoch": 0.7042889390519187, "grad_norm": 29.42574691772461, "learning_rate": 3.12202852614897e-06, "loss": 2.1699, "step": 936 }, { "epoch": 0.7050413844996237, "grad_norm": 20.876218795776367, "learning_rate": 3.1141045958795563e-06, "loss": 1.6353, "step": 937 }, { "epoch": 0.7057938299473289, "grad_norm": 37.073631286621094, "learning_rate": 3.106180665610143e-06, "loss": 2.0352, "step": 938 }, { "epoch": 0.7065462753950339, "grad_norm": 25.04046630859375, "learning_rate": 3.0982567353407295e-06, "loss": 2.1602, "step": 939 }, { "epoch": 0.7072987208427389, "grad_norm": 27.084985733032227, "learning_rate": 3.0903328050713157e-06, "loss": 2.5342, "step": 940 }, { "epoch": 0.708051166290444, "grad_norm": 27.979019165039062, "learning_rate": 3.082408874801902e-06, "loss": 2.0264, "step": 941 }, { "epoch": 0.708803611738149, "grad_norm": 24.28731346130371, "learning_rate": 3.074484944532488e-06, "loss": 2.541, "step": 942 }, { "epoch": 0.709556057185854, "grad_norm": 26.896106719970703, "learning_rate": 3.0665610142630747e-06, "loss": 1.5684, "step": 943 }, { "epoch": 0.710308502633559, "grad_norm": 38.26861572265625, "learning_rate": 3.0586370839936613e-06, "loss": 2.4727, "step": 944 }, { "epoch": 0.7110609480812641, "grad_norm": 34.82157516479492, "learning_rate": 3.0507131537242475e-06, "loss": 2.6074, "step": 945 }, { "epoch": 0.7118133935289691, "grad_norm": 22.82600975036621, "learning_rate": 3.042789223454834e-06, "loss": 2.4297, "step": 946 }, { "epoch": 0.7125658389766741, "grad_norm": 22.42252540588379, "learning_rate": 3.03486529318542e-06, "loss": 2.4795, "step": 947 }, { "epoch": 0.7133182844243793, "grad_norm": 25.8705997467041, "learning_rate": 3.0269413629160065e-06, "loss": 3.0166, "step": 948 }, { "epoch": 0.7140707298720843, "grad_norm": 23.13258171081543, "learning_rate": 3.019017432646593e-06, "loss": 2.0781, "step": 949 }, { "epoch": 0.7148231753197893, "grad_norm": 19.896459579467773, "learning_rate": 3.0110935023771793e-06, "loss": 2.2402, "step": 950 }, { "epoch": 0.7155756207674944, "grad_norm": 20.398405075073242, "learning_rate": 3.003169572107766e-06, "loss": 2.0166, "step": 951 }, { "epoch": 0.7163280662151994, "grad_norm": 25.631702423095703, "learning_rate": 2.9952456418383517e-06, "loss": 2.0332, "step": 952 }, { "epoch": 0.7170805116629044, "grad_norm": 19.276575088500977, "learning_rate": 2.9873217115689383e-06, "loss": 2.5947, "step": 953 }, { "epoch": 0.7178329571106095, "grad_norm": 22.23748207092285, "learning_rate": 2.979397781299525e-06, "loss": 2.0342, "step": 954 }, { "epoch": 0.7185854025583145, "grad_norm": 35.471561431884766, "learning_rate": 2.971473851030111e-06, "loss": 2.542, "step": 955 }, { "epoch": 0.7193378480060195, "grad_norm": 20.142765045166016, "learning_rate": 2.9635499207606977e-06, "loss": 2.168, "step": 956 }, { "epoch": 0.7200902934537246, "grad_norm": 25.604598999023438, "learning_rate": 2.955625990491284e-06, "loss": 2.084, "step": 957 }, { "epoch": 0.7208427389014297, "grad_norm": 24.70061492919922, "learning_rate": 2.94770206022187e-06, "loss": 2.3701, "step": 958 }, { "epoch": 0.7215951843491347, "grad_norm": 19.22873306274414, "learning_rate": 2.9397781299524568e-06, "loss": 2.5967, "step": 959 }, { "epoch": 0.7223476297968398, "grad_norm": 19.727336883544922, "learning_rate": 2.931854199683043e-06, "loss": 2.0752, "step": 960 }, { "epoch": 0.7231000752445448, "grad_norm": 25.627744674682617, "learning_rate": 2.9239302694136296e-06, "loss": 2.4873, "step": 961 }, { "epoch": 0.7238525206922498, "grad_norm": 23.095905303955078, "learning_rate": 2.9160063391442158e-06, "loss": 2.3765, "step": 962 }, { "epoch": 0.7246049661399548, "grad_norm": 21.401283264160156, "learning_rate": 2.9080824088748024e-06, "loss": 2.4014, "step": 963 }, { "epoch": 0.7253574115876599, "grad_norm": 23.942296981811523, "learning_rate": 2.900158478605388e-06, "loss": 2.5908, "step": 964 }, { "epoch": 0.7261098570353649, "grad_norm": 18.88973617553711, "learning_rate": 2.8922345483359748e-06, "loss": 2.042, "step": 965 }, { "epoch": 0.7268623024830699, "grad_norm": 21.262531280517578, "learning_rate": 2.8843106180665614e-06, "loss": 2.4756, "step": 966 }, { "epoch": 0.7276147479307751, "grad_norm": 18.916324615478516, "learning_rate": 2.8763866877971476e-06, "loss": 2.6748, "step": 967 }, { "epoch": 0.7283671933784801, "grad_norm": 21.240524291992188, "learning_rate": 2.868462757527734e-06, "loss": 2.4922, "step": 968 }, { "epoch": 0.7291196388261851, "grad_norm": 21.823871612548828, "learning_rate": 2.86053882725832e-06, "loss": 2.3828, "step": 969 }, { "epoch": 0.7298720842738902, "grad_norm": 38.113922119140625, "learning_rate": 2.8526148969889066e-06, "loss": 2.752, "step": 970 }, { "epoch": 0.7306245297215952, "grad_norm": 21.07139015197754, "learning_rate": 2.844690966719493e-06, "loss": 2.3223, "step": 971 }, { "epoch": 0.7313769751693002, "grad_norm": 18.12544822692871, "learning_rate": 2.8367670364500794e-06, "loss": 2.3076, "step": 972 }, { "epoch": 0.7321294206170053, "grad_norm": 25.153772354125977, "learning_rate": 2.828843106180666e-06, "loss": 2.5049, "step": 973 }, { "epoch": 0.7328818660647103, "grad_norm": 24.048927307128906, "learning_rate": 2.8209191759112518e-06, "loss": 2.7891, "step": 974 }, { "epoch": 0.7336343115124153, "grad_norm": 23.4043025970459, "learning_rate": 2.8129952456418384e-06, "loss": 2.7061, "step": 975 }, { "epoch": 0.7343867569601203, "grad_norm": 24.055015563964844, "learning_rate": 2.805071315372425e-06, "loss": 2.3691, "step": 976 }, { "epoch": 0.7351392024078255, "grad_norm": 30.86815643310547, "learning_rate": 2.797147385103011e-06, "loss": 2.4199, "step": 977 }, { "epoch": 0.7358916478555305, "grad_norm": 19.904773712158203, "learning_rate": 2.789223454833598e-06, "loss": 2.4951, "step": 978 }, { "epoch": 0.7366440933032355, "grad_norm": 18.34711265563965, "learning_rate": 2.781299524564184e-06, "loss": 2.5, "step": 979 }, { "epoch": 0.7373965387509406, "grad_norm": 17.771268844604492, "learning_rate": 2.7733755942947706e-06, "loss": 1.8232, "step": 980 }, { "epoch": 0.7381489841986456, "grad_norm": 20.759653091430664, "learning_rate": 2.7654516640253572e-06, "loss": 2.1533, "step": 981 }, { "epoch": 0.7389014296463506, "grad_norm": 27.191268920898438, "learning_rate": 2.757527733755943e-06, "loss": 2.2979, "step": 982 }, { "epoch": 0.7396538750940557, "grad_norm": 23.227584838867188, "learning_rate": 2.7496038034865296e-06, "loss": 2.0762, "step": 983 }, { "epoch": 0.7404063205417607, "grad_norm": 21.38162612915039, "learning_rate": 2.741679873217116e-06, "loss": 2.709, "step": 984 }, { "epoch": 0.7411587659894657, "grad_norm": 17.612733840942383, "learning_rate": 2.7337559429477024e-06, "loss": 2.3223, "step": 985 }, { "epoch": 0.7419112114371708, "grad_norm": 26.95534896850586, "learning_rate": 2.725832012678289e-06, "loss": 2.3506, "step": 986 }, { "epoch": 0.7426636568848759, "grad_norm": 46.48407745361328, "learning_rate": 2.717908082408875e-06, "loss": 2.0186, "step": 987 }, { "epoch": 0.7434161023325809, "grad_norm": 24.3732967376709, "learning_rate": 2.7099841521394614e-06, "loss": 2.2715, "step": 988 }, { "epoch": 0.744168547780286, "grad_norm": 18.800397872924805, "learning_rate": 2.7020602218700476e-06, "loss": 2.3564, "step": 989 }, { "epoch": 0.744920993227991, "grad_norm": 28.8432674407959, "learning_rate": 2.6941362916006342e-06, "loss": 2.8242, "step": 990 }, { "epoch": 0.745673438675696, "grad_norm": 23.0736026763916, "learning_rate": 2.686212361331221e-06, "loss": 2.1826, "step": 991 }, { "epoch": 0.746425884123401, "grad_norm": 27.80742073059082, "learning_rate": 2.6782884310618066e-06, "loss": 2.2583, "step": 992 }, { "epoch": 0.7471783295711061, "grad_norm": 21.215221405029297, "learning_rate": 2.6703645007923932e-06, "loss": 2.6289, "step": 993 }, { "epoch": 0.7479307750188111, "grad_norm": 25.544788360595703, "learning_rate": 2.6624405705229794e-06, "loss": 2.4414, "step": 994 }, { "epoch": 0.7486832204665161, "grad_norm": 27.689598083496094, "learning_rate": 2.654516640253566e-06, "loss": 2.5352, "step": 995 }, { "epoch": 0.7494356659142212, "grad_norm": 21.94173240661621, "learning_rate": 2.6465927099841527e-06, "loss": 2.2261, "step": 996 }, { "epoch": 0.7501881113619263, "grad_norm": 20.872621536254883, "learning_rate": 2.638668779714739e-06, "loss": 1.9814, "step": 997 }, { "epoch": 0.7509405568096313, "grad_norm": 21.449371337890625, "learning_rate": 2.630744849445325e-06, "loss": 2.4668, "step": 998 }, { "epoch": 0.7516930022573364, "grad_norm": 18.553009033203125, "learning_rate": 2.6228209191759112e-06, "loss": 2.1934, "step": 999 }, { "epoch": 0.7524454477050414, "grad_norm": 31.450166702270508, "learning_rate": 2.614896988906498e-06, "loss": 2.2305, "step": 1000 }, { "epoch": 0.7531978931527464, "grad_norm": 30.71107292175293, "learning_rate": 2.606973058637084e-06, "loss": 2.3838, "step": 1001 }, { "epoch": 0.7539503386004515, "grad_norm": 19.482921600341797, "learning_rate": 2.5990491283676707e-06, "loss": 2.0806, "step": 1002 }, { "epoch": 0.7547027840481565, "grad_norm": 27.4390926361084, "learning_rate": 2.5911251980982573e-06, "loss": 2.5801, "step": 1003 }, { "epoch": 0.7554552294958615, "grad_norm": 39.73894119262695, "learning_rate": 2.583201267828843e-06, "loss": 2.5068, "step": 1004 }, { "epoch": 0.7562076749435666, "grad_norm": 35.52018356323242, "learning_rate": 2.5752773375594297e-06, "loss": 2.6641, "step": 1005 }, { "epoch": 0.7569601203912716, "grad_norm": 23.1878719329834, "learning_rate": 2.567353407290016e-06, "loss": 2.2603, "step": 1006 }, { "epoch": 0.7577125658389767, "grad_norm": 21.26180648803711, "learning_rate": 2.5594294770206025e-06, "loss": 2.0269, "step": 1007 }, { "epoch": 0.7584650112866818, "grad_norm": 18.922929763793945, "learning_rate": 2.551505546751189e-06, "loss": 1.9541, "step": 1008 }, { "epoch": 0.7592174567343868, "grad_norm": 24.16057586669922, "learning_rate": 2.543581616481775e-06, "loss": 3.0117, "step": 1009 }, { "epoch": 0.7599699021820918, "grad_norm": 19.66029930114746, "learning_rate": 2.5356576862123615e-06, "loss": 2.0205, "step": 1010 }, { "epoch": 0.7607223476297968, "grad_norm": 24.95394515991211, "learning_rate": 2.5277337559429477e-06, "loss": 2.6768, "step": 1011 }, { "epoch": 0.7614747930775019, "grad_norm": 30.851152420043945, "learning_rate": 2.5198098256735343e-06, "loss": 1.8813, "step": 1012 }, { "epoch": 0.7622272385252069, "grad_norm": 23.487869262695312, "learning_rate": 2.511885895404121e-06, "loss": 1.9902, "step": 1013 }, { "epoch": 0.7629796839729119, "grad_norm": 20.58772087097168, "learning_rate": 2.503961965134707e-06, "loss": 2.7119, "step": 1014 }, { "epoch": 0.763732129420617, "grad_norm": 19.332704544067383, "learning_rate": 2.4960380348652933e-06, "loss": 2.4258, "step": 1015 }, { "epoch": 0.764484574868322, "grad_norm": 31.803115844726562, "learning_rate": 2.48811410459588e-06, "loss": 2.707, "step": 1016 }, { "epoch": 0.7652370203160271, "grad_norm": 28.797060012817383, "learning_rate": 2.480190174326466e-06, "loss": 1.7207, "step": 1017 }, { "epoch": 0.7659894657637322, "grad_norm": 30.22066307067871, "learning_rate": 2.4722662440570523e-06, "loss": 2.2822, "step": 1018 }, { "epoch": 0.7667419112114372, "grad_norm": 15.914982795715332, "learning_rate": 2.464342313787639e-06, "loss": 2.2627, "step": 1019 }, { "epoch": 0.7674943566591422, "grad_norm": 24.08812713623047, "learning_rate": 2.4564183835182255e-06, "loss": 2.1035, "step": 1020 }, { "epoch": 0.7682468021068473, "grad_norm": 35.42763137817383, "learning_rate": 2.4484944532488117e-06, "loss": 2.832, "step": 1021 }, { "epoch": 0.7689992475545523, "grad_norm": 19.57898712158203, "learning_rate": 2.440570522979398e-06, "loss": 2.3027, "step": 1022 }, { "epoch": 0.7697516930022573, "grad_norm": 24.63896942138672, "learning_rate": 2.432646592709984e-06, "loss": 2.1934, "step": 1023 }, { "epoch": 0.7705041384499624, "grad_norm": 18.475221633911133, "learning_rate": 2.4247226624405707e-06, "loss": 1.8838, "step": 1024 }, { "epoch": 0.7712565838976674, "grad_norm": 20.874549865722656, "learning_rate": 2.416798732171157e-06, "loss": 2.0713, "step": 1025 }, { "epoch": 0.7720090293453724, "grad_norm": 25.372650146484375, "learning_rate": 2.4088748019017435e-06, "loss": 2.2949, "step": 1026 }, { "epoch": 0.7727614747930776, "grad_norm": 19.395593643188477, "learning_rate": 2.4009508716323297e-06, "loss": 2.252, "step": 1027 }, { "epoch": 0.7735139202407826, "grad_norm": 25.478004455566406, "learning_rate": 2.3930269413629164e-06, "loss": 2.3438, "step": 1028 }, { "epoch": 0.7742663656884876, "grad_norm": 22.913898468017578, "learning_rate": 2.3851030110935025e-06, "loss": 2.7061, "step": 1029 }, { "epoch": 0.7750188111361926, "grad_norm": 17.623754501342773, "learning_rate": 2.3771790808240887e-06, "loss": 1.833, "step": 1030 }, { "epoch": 0.7757712565838977, "grad_norm": 36.00138854980469, "learning_rate": 2.3692551505546754e-06, "loss": 1.9961, "step": 1031 }, { "epoch": 0.7765237020316027, "grad_norm": 22.17951202392578, "learning_rate": 2.3613312202852615e-06, "loss": 2.291, "step": 1032 }, { "epoch": 0.7772761474793077, "grad_norm": 27.934125900268555, "learning_rate": 2.353407290015848e-06, "loss": 2.0273, "step": 1033 }, { "epoch": 0.7780285929270128, "grad_norm": 25.72597885131836, "learning_rate": 2.3454833597464344e-06, "loss": 2.3418, "step": 1034 }, { "epoch": 0.7787810383747178, "grad_norm": 20.993690490722656, "learning_rate": 2.3375594294770205e-06, "loss": 2.2773, "step": 1035 }, { "epoch": 0.7795334838224228, "grad_norm": 19.845046997070312, "learning_rate": 2.329635499207607e-06, "loss": 2.4473, "step": 1036 }, { "epoch": 0.780285929270128, "grad_norm": 19.89436149597168, "learning_rate": 2.3217115689381938e-06, "loss": 2.186, "step": 1037 }, { "epoch": 0.781038374717833, "grad_norm": 21.185094833374023, "learning_rate": 2.31378763866878e-06, "loss": 1.8525, "step": 1038 }, { "epoch": 0.781790820165538, "grad_norm": 19.601686477661133, "learning_rate": 2.305863708399366e-06, "loss": 2.0459, "step": 1039 }, { "epoch": 0.782543265613243, "grad_norm": 28.16640853881836, "learning_rate": 2.2979397781299524e-06, "loss": 1.9453, "step": 1040 }, { "epoch": 0.7832957110609481, "grad_norm": 20.31542205810547, "learning_rate": 2.290015847860539e-06, "loss": 2.0532, "step": 1041 }, { "epoch": 0.7840481565086531, "grad_norm": 43.06874465942383, "learning_rate": 2.2820919175911256e-06, "loss": 2.6689, "step": 1042 }, { "epoch": 0.7848006019563581, "grad_norm": 24.530195236206055, "learning_rate": 2.2741679873217118e-06, "loss": 2.4707, "step": 1043 }, { "epoch": 0.7855530474040632, "grad_norm": 20.09838104248047, "learning_rate": 2.266244057052298e-06, "loss": 1.9854, "step": 1044 }, { "epoch": 0.7863054928517682, "grad_norm": 26.703359603881836, "learning_rate": 2.2583201267828846e-06, "loss": 2.6191, "step": 1045 }, { "epoch": 0.7870579382994732, "grad_norm": 20.001371383666992, "learning_rate": 2.250396196513471e-06, "loss": 2.1367, "step": 1046 }, { "epoch": 0.7878103837471784, "grad_norm": 21.10999870300293, "learning_rate": 2.2424722662440574e-06, "loss": 2.0635, "step": 1047 }, { "epoch": 0.7885628291948834, "grad_norm": 30.246315002441406, "learning_rate": 2.2345483359746436e-06, "loss": 2.8379, "step": 1048 }, { "epoch": 0.7893152746425884, "grad_norm": 19.928213119506836, "learning_rate": 2.22662440570523e-06, "loss": 2.2905, "step": 1049 }, { "epoch": 0.7900677200902935, "grad_norm": 34.11455535888672, "learning_rate": 2.2187004754358164e-06, "loss": 1.7715, "step": 1050 }, { "epoch": 0.7908201655379985, "grad_norm": 21.632551193237305, "learning_rate": 2.2107765451664026e-06, "loss": 2.2568, "step": 1051 }, { "epoch": 0.7915726109857035, "grad_norm": 37.97250747680664, "learning_rate": 2.2028526148969892e-06, "loss": 2.3799, "step": 1052 }, { "epoch": 0.7923250564334086, "grad_norm": 27.17977523803711, "learning_rate": 2.1949286846275754e-06, "loss": 2.9277, "step": 1053 }, { "epoch": 0.7930775018811136, "grad_norm": 20.97044563293457, "learning_rate": 2.187004754358162e-06, "loss": 2.209, "step": 1054 }, { "epoch": 0.7938299473288186, "grad_norm": 25.320844650268555, "learning_rate": 2.1790808240887482e-06, "loss": 2.5098, "step": 1055 }, { "epoch": 0.7945823927765236, "grad_norm": 21.632658004760742, "learning_rate": 2.1711568938193344e-06, "loss": 2.4844, "step": 1056 }, { "epoch": 0.7953348382242288, "grad_norm": 17.359174728393555, "learning_rate": 2.163232963549921e-06, "loss": 2.0874, "step": 1057 }, { "epoch": 0.7960872836719338, "grad_norm": 32.31191635131836, "learning_rate": 2.1553090332805072e-06, "loss": 2.207, "step": 1058 }, { "epoch": 0.7968397291196389, "grad_norm": 26.761028289794922, "learning_rate": 2.147385103011094e-06, "loss": 2.7266, "step": 1059 }, { "epoch": 0.7975921745673439, "grad_norm": 22.91716194152832, "learning_rate": 2.13946117274168e-06, "loss": 2.0654, "step": 1060 }, { "epoch": 0.7983446200150489, "grad_norm": 29.451723098754883, "learning_rate": 2.1315372424722662e-06, "loss": 1.999, "step": 1061 }, { "epoch": 0.7990970654627539, "grad_norm": 20.911706924438477, "learning_rate": 2.123613312202853e-06, "loss": 2.0498, "step": 1062 }, { "epoch": 0.799849510910459, "grad_norm": 23.506391525268555, "learning_rate": 2.1156893819334395e-06, "loss": 2.2852, "step": 1063 }, { "epoch": 0.800601956358164, "grad_norm": 16.55670738220215, "learning_rate": 2.1077654516640257e-06, "loss": 1.9072, "step": 1064 }, { "epoch": 0.801354401805869, "grad_norm": 22.04123878479004, "learning_rate": 2.099841521394612e-06, "loss": 2.6279, "step": 1065 }, { "epoch": 0.8021068472535741, "grad_norm": 23.356552124023438, "learning_rate": 2.091917591125198e-06, "loss": 2.0391, "step": 1066 }, { "epoch": 0.8028592927012792, "grad_norm": 21.152170181274414, "learning_rate": 2.0839936608557847e-06, "loss": 2.3486, "step": 1067 }, { "epoch": 0.8036117381489842, "grad_norm": 41.95413589477539, "learning_rate": 2.0760697305863713e-06, "loss": 2.5762, "step": 1068 }, { "epoch": 0.8043641835966893, "grad_norm": 22.67193031311035, "learning_rate": 2.0681458003169575e-06, "loss": 2.9912, "step": 1069 }, { "epoch": 0.8051166290443943, "grad_norm": 22.507898330688477, "learning_rate": 2.0602218700475437e-06, "loss": 2.1948, "step": 1070 }, { "epoch": 0.8058690744920993, "grad_norm": 38.1120719909668, "learning_rate": 2.0522979397781303e-06, "loss": 2.4033, "step": 1071 }, { "epoch": 0.8066215199398044, "grad_norm": 27.03844451904297, "learning_rate": 2.0443740095087165e-06, "loss": 2.5049, "step": 1072 }, { "epoch": 0.8073739653875094, "grad_norm": 25.915897369384766, "learning_rate": 2.036450079239303e-06, "loss": 2.4033, "step": 1073 }, { "epoch": 0.8081264108352144, "grad_norm": 20.6876277923584, "learning_rate": 2.0285261489698893e-06, "loss": 2.2363, "step": 1074 }, { "epoch": 0.8088788562829194, "grad_norm": 23.289859771728516, "learning_rate": 2.0206022187004755e-06, "loss": 2.2559, "step": 1075 }, { "epoch": 0.8096313017306245, "grad_norm": 19.997568130493164, "learning_rate": 2.012678288431062e-06, "loss": 2.0186, "step": 1076 }, { "epoch": 0.8103837471783296, "grad_norm": 17.515745162963867, "learning_rate": 2.0047543581616483e-06, "loss": 2.3486, "step": 1077 }, { "epoch": 0.8111361926260346, "grad_norm": 21.844892501831055, "learning_rate": 1.9968304278922345e-06, "loss": 2.249, "step": 1078 }, { "epoch": 0.8118886380737397, "grad_norm": 30.721351623535156, "learning_rate": 1.988906497622821e-06, "loss": 2.7441, "step": 1079 }, { "epoch": 0.8126410835214447, "grad_norm": 26.29536247253418, "learning_rate": 1.9809825673534077e-06, "loss": 2.1123, "step": 1080 }, { "epoch": 0.8133935289691497, "grad_norm": 25.76918601989746, "learning_rate": 1.973058637083994e-06, "loss": 2.167, "step": 1081 }, { "epoch": 0.8141459744168548, "grad_norm": 36.153045654296875, "learning_rate": 1.96513470681458e-06, "loss": 2.9219, "step": 1082 }, { "epoch": 0.8148984198645598, "grad_norm": 19.623029708862305, "learning_rate": 1.9572107765451663e-06, "loss": 1.9341, "step": 1083 }, { "epoch": 0.8156508653122648, "grad_norm": 28.89622688293457, "learning_rate": 1.949286846275753e-06, "loss": 2.748, "step": 1084 }, { "epoch": 0.8164033107599699, "grad_norm": 17.98982048034668, "learning_rate": 1.9413629160063395e-06, "loss": 1.7686, "step": 1085 }, { "epoch": 0.8171557562076749, "grad_norm": 25.77059555053711, "learning_rate": 1.9334389857369257e-06, "loss": 1.8994, "step": 1086 }, { "epoch": 0.81790820165538, "grad_norm": 26.73072052001953, "learning_rate": 1.925515055467512e-06, "loss": 2.0908, "step": 1087 }, { "epoch": 0.8186606471030851, "grad_norm": 31.21881866455078, "learning_rate": 1.9175911251980985e-06, "loss": 2.2412, "step": 1088 }, { "epoch": 0.8194130925507901, "grad_norm": 20.786651611328125, "learning_rate": 1.9096671949286847e-06, "loss": 2.2842, "step": 1089 }, { "epoch": 0.8201655379984951, "grad_norm": 24.010360717773438, "learning_rate": 1.9017432646592713e-06, "loss": 2.0205, "step": 1090 }, { "epoch": 0.8209179834462002, "grad_norm": 20.723363876342773, "learning_rate": 1.8938193343898575e-06, "loss": 2.041, "step": 1091 }, { "epoch": 0.8216704288939052, "grad_norm": 25.89142608642578, "learning_rate": 1.885895404120444e-06, "loss": 1.8486, "step": 1092 }, { "epoch": 0.8224228743416102, "grad_norm": 20.13483428955078, "learning_rate": 1.8779714738510301e-06, "loss": 1.9575, "step": 1093 }, { "epoch": 0.8231753197893152, "grad_norm": 29.47180938720703, "learning_rate": 1.8700475435816165e-06, "loss": 2.2529, "step": 1094 }, { "epoch": 0.8239277652370203, "grad_norm": 21.102237701416016, "learning_rate": 1.8621236133122031e-06, "loss": 2.2578, "step": 1095 }, { "epoch": 0.8246802106847254, "grad_norm": 23.39295768737793, "learning_rate": 1.8541996830427893e-06, "loss": 2.8486, "step": 1096 }, { "epoch": 0.8254326561324304, "grad_norm": 22.92432403564453, "learning_rate": 1.8462757527733757e-06, "loss": 2.0596, "step": 1097 }, { "epoch": 0.8261851015801355, "grad_norm": 26.11507797241211, "learning_rate": 1.8383518225039621e-06, "loss": 2.1094, "step": 1098 }, { "epoch": 0.8269375470278405, "grad_norm": 22.83144760131836, "learning_rate": 1.8304278922345483e-06, "loss": 2.252, "step": 1099 }, { "epoch": 0.8276899924755455, "grad_norm": 23.018451690673828, "learning_rate": 1.822503961965135e-06, "loss": 2.3848, "step": 1100 }, { "epoch": 0.8284424379232506, "grad_norm": 22.875572204589844, "learning_rate": 1.8145800316957214e-06, "loss": 2.7886, "step": 1101 }, { "epoch": 0.8291948833709556, "grad_norm": 23.032974243164062, "learning_rate": 1.8066561014263076e-06, "loss": 2.2539, "step": 1102 }, { "epoch": 0.8299473288186606, "grad_norm": 18.94373893737793, "learning_rate": 1.798732171156894e-06, "loss": 1.9492, "step": 1103 }, { "epoch": 0.8306997742663657, "grad_norm": 24.989089965820312, "learning_rate": 1.7908082408874801e-06, "loss": 2.1313, "step": 1104 }, { "epoch": 0.8314522197140707, "grad_norm": 17.287385940551758, "learning_rate": 1.7828843106180668e-06, "loss": 1.7773, "step": 1105 }, { "epoch": 0.8322046651617758, "grad_norm": 39.13855743408203, "learning_rate": 1.7749603803486532e-06, "loss": 2.3174, "step": 1106 }, { "epoch": 0.8329571106094809, "grad_norm": 27.369783401489258, "learning_rate": 1.7670364500792394e-06, "loss": 2.6826, "step": 1107 }, { "epoch": 0.8337095560571859, "grad_norm": 29.145048141479492, "learning_rate": 1.7591125198098258e-06, "loss": 2.6401, "step": 1108 }, { "epoch": 0.8344620015048909, "grad_norm": 19.961219787597656, "learning_rate": 1.7511885895404122e-06, "loss": 1.9082, "step": 1109 }, { "epoch": 0.835214446952596, "grad_norm": 35.9928092956543, "learning_rate": 1.7432646592709988e-06, "loss": 2.5713, "step": 1110 }, { "epoch": 0.835966892400301, "grad_norm": 20.073556900024414, "learning_rate": 1.735340729001585e-06, "loss": 2.1055, "step": 1111 }, { "epoch": 0.836719337848006, "grad_norm": 22.781147003173828, "learning_rate": 1.7274167987321714e-06, "loss": 2.4678, "step": 1112 }, { "epoch": 0.837471783295711, "grad_norm": 18.387203216552734, "learning_rate": 1.7194928684627576e-06, "loss": 2.2944, "step": 1113 }, { "epoch": 0.8382242287434161, "grad_norm": 26.837833404541016, "learning_rate": 1.711568938193344e-06, "loss": 2.8203, "step": 1114 }, { "epoch": 0.8389766741911211, "grad_norm": 22.23270034790039, "learning_rate": 1.7036450079239304e-06, "loss": 1.7646, "step": 1115 }, { "epoch": 0.8397291196388262, "grad_norm": 23.629858016967773, "learning_rate": 1.6957210776545168e-06, "loss": 1.7197, "step": 1116 }, { "epoch": 0.8404815650865313, "grad_norm": 21.33991813659668, "learning_rate": 1.6877971473851032e-06, "loss": 2.0195, "step": 1117 }, { "epoch": 0.8412340105342363, "grad_norm": 21.652618408203125, "learning_rate": 1.6798732171156896e-06, "loss": 1.9268, "step": 1118 }, { "epoch": 0.8419864559819413, "grad_norm": 29.22657012939453, "learning_rate": 1.6719492868462758e-06, "loss": 2.5576, "step": 1119 }, { "epoch": 0.8427389014296464, "grad_norm": 26.373394012451172, "learning_rate": 1.6640253565768622e-06, "loss": 2.292, "step": 1120 }, { "epoch": 0.8434913468773514, "grad_norm": 21.04275131225586, "learning_rate": 1.6561014263074488e-06, "loss": 2.2852, "step": 1121 }, { "epoch": 0.8442437923250564, "grad_norm": 24.09245491027832, "learning_rate": 1.648177496038035e-06, "loss": 2.3447, "step": 1122 }, { "epoch": 0.8449962377727614, "grad_norm": 29.287586212158203, "learning_rate": 1.6402535657686214e-06, "loss": 2.23, "step": 1123 }, { "epoch": 0.8457486832204665, "grad_norm": 19.69169807434082, "learning_rate": 1.6323296354992076e-06, "loss": 2.1064, "step": 1124 }, { "epoch": 0.8465011286681715, "grad_norm": 19.223487854003906, "learning_rate": 1.624405705229794e-06, "loss": 2.291, "step": 1125 }, { "epoch": 0.8472535741158767, "grad_norm": 21.353008270263672, "learning_rate": 1.6164817749603806e-06, "loss": 1.6641, "step": 1126 }, { "epoch": 0.8480060195635817, "grad_norm": 28.44927978515625, "learning_rate": 1.608557844690967e-06, "loss": 2.8379, "step": 1127 }, { "epoch": 0.8487584650112867, "grad_norm": 20.611202239990234, "learning_rate": 1.6006339144215532e-06, "loss": 2.1348, "step": 1128 }, { "epoch": 0.8495109104589917, "grad_norm": 20.636144638061523, "learning_rate": 1.5927099841521396e-06, "loss": 1.8086, "step": 1129 }, { "epoch": 0.8502633559066968, "grad_norm": 29.253482818603516, "learning_rate": 1.5847860538827258e-06, "loss": 2.395, "step": 1130 }, { "epoch": 0.8510158013544018, "grad_norm": 27.642684936523438, "learning_rate": 1.5768621236133122e-06, "loss": 1.9863, "step": 1131 }, { "epoch": 0.8517682468021068, "grad_norm": 38.00718688964844, "learning_rate": 1.5689381933438988e-06, "loss": 2.6221, "step": 1132 }, { "epoch": 0.8525206922498119, "grad_norm": 31.242435455322266, "learning_rate": 1.561014263074485e-06, "loss": 2.2764, "step": 1133 }, { "epoch": 0.8532731376975169, "grad_norm": 37.08576202392578, "learning_rate": 1.5530903328050714e-06, "loss": 2.2881, "step": 1134 }, { "epoch": 0.8540255831452219, "grad_norm": 25.496156692504883, "learning_rate": 1.5451664025356578e-06, "loss": 2.1523, "step": 1135 }, { "epoch": 0.8547780285929271, "grad_norm": 24.68577766418457, "learning_rate": 1.537242472266244e-06, "loss": 2.0137, "step": 1136 }, { "epoch": 0.8555304740406321, "grad_norm": 19.2375431060791, "learning_rate": 1.5293185419968307e-06, "loss": 1.9766, "step": 1137 }, { "epoch": 0.8562829194883371, "grad_norm": 23.542686462402344, "learning_rate": 1.521394611727417e-06, "loss": 2.2139, "step": 1138 }, { "epoch": 0.8570353649360422, "grad_norm": 20.412443161010742, "learning_rate": 1.5134706814580033e-06, "loss": 2.1104, "step": 1139 }, { "epoch": 0.8577878103837472, "grad_norm": 28.579959869384766, "learning_rate": 1.5055467511885897e-06, "loss": 2.4404, "step": 1140 }, { "epoch": 0.8585402558314522, "grad_norm": 17.60219383239746, "learning_rate": 1.4976228209191759e-06, "loss": 2.1152, "step": 1141 }, { "epoch": 0.8592927012791572, "grad_norm": 23.072465896606445, "learning_rate": 1.4896988906497625e-06, "loss": 2.292, "step": 1142 }, { "epoch": 0.8600451467268623, "grad_norm": 18.52507972717285, "learning_rate": 1.4817749603803489e-06, "loss": 2.2676, "step": 1143 }, { "epoch": 0.8607975921745673, "grad_norm": 20.507993698120117, "learning_rate": 1.473851030110935e-06, "loss": 1.5454, "step": 1144 }, { "epoch": 0.8615500376222723, "grad_norm": 21.937292098999023, "learning_rate": 1.4659270998415215e-06, "loss": 2.5098, "step": 1145 }, { "epoch": 0.8623024830699775, "grad_norm": 30.636445999145508, "learning_rate": 1.4580031695721079e-06, "loss": 2.6172, "step": 1146 }, { "epoch": 0.8630549285176825, "grad_norm": 22.22297477722168, "learning_rate": 1.450079239302694e-06, "loss": 1.6914, "step": 1147 }, { "epoch": 0.8638073739653875, "grad_norm": 22.2825984954834, "learning_rate": 1.4421553090332807e-06, "loss": 2.167, "step": 1148 }, { "epoch": 0.8645598194130926, "grad_norm": 18.126550674438477, "learning_rate": 1.434231378763867e-06, "loss": 2.0254, "step": 1149 }, { "epoch": 0.8653122648607976, "grad_norm": 19.636568069458008, "learning_rate": 1.4263074484944533e-06, "loss": 1.687, "step": 1150 }, { "epoch": 0.8660647103085026, "grad_norm": 20.647727966308594, "learning_rate": 1.4183835182250397e-06, "loss": 2.0898, "step": 1151 }, { "epoch": 0.8668171557562077, "grad_norm": 22.94881248474121, "learning_rate": 1.4104595879556259e-06, "loss": 2.3364, "step": 1152 }, { "epoch": 0.8675696012039127, "grad_norm": 25.914772033691406, "learning_rate": 1.4025356576862125e-06, "loss": 1.8203, "step": 1153 }, { "epoch": 0.8683220466516177, "grad_norm": 27.735950469970703, "learning_rate": 1.394611727416799e-06, "loss": 2.1641, "step": 1154 }, { "epoch": 0.8690744920993227, "grad_norm": 23.504257202148438, "learning_rate": 1.3866877971473853e-06, "loss": 2.0322, "step": 1155 }, { "epoch": 0.8698269375470279, "grad_norm": 19.194591522216797, "learning_rate": 1.3787638668779715e-06, "loss": 1.8711, "step": 1156 }, { "epoch": 0.8705793829947329, "grad_norm": 22.447521209716797, "learning_rate": 1.370839936608558e-06, "loss": 2.062, "step": 1157 }, { "epoch": 0.871331828442438, "grad_norm": 21.444046020507812, "learning_rate": 1.3629160063391445e-06, "loss": 2.0732, "step": 1158 }, { "epoch": 0.872084273890143, "grad_norm": 32.98313522338867, "learning_rate": 1.3549920760697307e-06, "loss": 2.6113, "step": 1159 }, { "epoch": 0.872836719337848, "grad_norm": 30.398639678955078, "learning_rate": 1.3470681458003171e-06, "loss": 2.2158, "step": 1160 }, { "epoch": 0.873589164785553, "grad_norm": 24.345083236694336, "learning_rate": 1.3391442155309033e-06, "loss": 2.1172, "step": 1161 }, { "epoch": 0.8743416102332581, "grad_norm": 29.294410705566406, "learning_rate": 1.3312202852614897e-06, "loss": 2.8584, "step": 1162 }, { "epoch": 0.8750940556809631, "grad_norm": 27.93486785888672, "learning_rate": 1.3232963549920763e-06, "loss": 1.9658, "step": 1163 }, { "epoch": 0.8758465011286681, "grad_norm": 24.938095092773438, "learning_rate": 1.3153724247226625e-06, "loss": 2.3027, "step": 1164 }, { "epoch": 0.8765989465763732, "grad_norm": 22.570066452026367, "learning_rate": 1.307448494453249e-06, "loss": 2.3174, "step": 1165 }, { "epoch": 0.8773513920240783, "grad_norm": 19.348644256591797, "learning_rate": 1.2995245641838353e-06, "loss": 1.8423, "step": 1166 }, { "epoch": 0.8781038374717833, "grad_norm": 23.712268829345703, "learning_rate": 1.2916006339144215e-06, "loss": 2.5195, "step": 1167 }, { "epoch": 0.8788562829194884, "grad_norm": 21.44355583190918, "learning_rate": 1.283676703645008e-06, "loss": 1.875, "step": 1168 }, { "epoch": 0.8796087283671934, "grad_norm": 24.918310165405273, "learning_rate": 1.2757527733755946e-06, "loss": 2.2021, "step": 1169 }, { "epoch": 0.8803611738148984, "grad_norm": 21.855985641479492, "learning_rate": 1.2678288431061807e-06, "loss": 2.022, "step": 1170 }, { "epoch": 0.8811136192626035, "grad_norm": 21.790687561035156, "learning_rate": 1.2599049128367671e-06, "loss": 2.29, "step": 1171 }, { "epoch": 0.8818660647103085, "grad_norm": 20.67224884033203, "learning_rate": 1.2519809825673536e-06, "loss": 1.9756, "step": 1172 }, { "epoch": 0.8826185101580135, "grad_norm": 23.416410446166992, "learning_rate": 1.24405705229794e-06, "loss": 2.1387, "step": 1173 }, { "epoch": 0.8833709556057185, "grad_norm": 29.197629928588867, "learning_rate": 1.2361331220285262e-06, "loss": 2.4033, "step": 1174 }, { "epoch": 0.8841234010534236, "grad_norm": 22.050270080566406, "learning_rate": 1.2282091917591128e-06, "loss": 1.6289, "step": 1175 }, { "epoch": 0.8848758465011287, "grad_norm": 16.857227325439453, "learning_rate": 1.220285261489699e-06, "loss": 1.8237, "step": 1176 }, { "epoch": 0.8856282919488337, "grad_norm": 28.409175872802734, "learning_rate": 1.2123613312202854e-06, "loss": 2.082, "step": 1177 }, { "epoch": 0.8863807373965388, "grad_norm": 22.208608627319336, "learning_rate": 1.2044374009508718e-06, "loss": 2.5762, "step": 1178 }, { "epoch": 0.8871331828442438, "grad_norm": 15.535444259643555, "learning_rate": 1.1965134706814582e-06, "loss": 1.9907, "step": 1179 }, { "epoch": 0.8878856282919488, "grad_norm": 27.926977157592773, "learning_rate": 1.1885895404120444e-06, "loss": 1.917, "step": 1180 }, { "epoch": 0.8886380737396539, "grad_norm": 32.624481201171875, "learning_rate": 1.1806656101426308e-06, "loss": 1.686, "step": 1181 }, { "epoch": 0.8893905191873589, "grad_norm": 19.761180877685547, "learning_rate": 1.1727416798732172e-06, "loss": 2.1934, "step": 1182 }, { "epoch": 0.8901429646350639, "grad_norm": 28.41836929321289, "learning_rate": 1.1648177496038036e-06, "loss": 2.5391, "step": 1183 }, { "epoch": 0.890895410082769, "grad_norm": 23.069141387939453, "learning_rate": 1.15689381933439e-06, "loss": 2.126, "step": 1184 }, { "epoch": 0.891647855530474, "grad_norm": 31.388771057128906, "learning_rate": 1.1489698890649762e-06, "loss": 2.4609, "step": 1185 }, { "epoch": 0.8924003009781791, "grad_norm": 18.07545280456543, "learning_rate": 1.1410459587955628e-06, "loss": 1.8765, "step": 1186 }, { "epoch": 0.8931527464258842, "grad_norm": 21.415142059326172, "learning_rate": 1.133122028526149e-06, "loss": 1.7529, "step": 1187 }, { "epoch": 0.8939051918735892, "grad_norm": 17.008604049682617, "learning_rate": 1.1251980982567354e-06, "loss": 1.4946, "step": 1188 }, { "epoch": 0.8946576373212942, "grad_norm": 23.807188034057617, "learning_rate": 1.1172741679873218e-06, "loss": 2.0234, "step": 1189 }, { "epoch": 0.8954100827689992, "grad_norm": 18.214134216308594, "learning_rate": 1.1093502377179082e-06, "loss": 1.8613, "step": 1190 }, { "epoch": 0.8961625282167043, "grad_norm": 24.6344051361084, "learning_rate": 1.1014263074484946e-06, "loss": 2.5996, "step": 1191 }, { "epoch": 0.8969149736644093, "grad_norm": 17.742876052856445, "learning_rate": 1.093502377179081e-06, "loss": 1.7769, "step": 1192 }, { "epoch": 0.8976674191121143, "grad_norm": 23.508689880371094, "learning_rate": 1.0855784469096672e-06, "loss": 2.0527, "step": 1193 }, { "epoch": 0.8984198645598194, "grad_norm": 27.96549415588379, "learning_rate": 1.0776545166402536e-06, "loss": 2.5054, "step": 1194 }, { "epoch": 0.8991723100075244, "grad_norm": 23.357175827026367, "learning_rate": 1.06973058637084e-06, "loss": 1.9805, "step": 1195 }, { "epoch": 0.8999247554552295, "grad_norm": 26.538541793823242, "learning_rate": 1.0618066561014264e-06, "loss": 2.2412, "step": 1196 }, { "epoch": 0.9006772009029346, "grad_norm": 20.19118881225586, "learning_rate": 1.0538827258320128e-06, "loss": 1.9414, "step": 1197 }, { "epoch": 0.9014296463506396, "grad_norm": 24.33431625366211, "learning_rate": 1.045958795562599e-06, "loss": 2.0566, "step": 1198 }, { "epoch": 0.9021820917983446, "grad_norm": 22.80762481689453, "learning_rate": 1.0380348652931856e-06, "loss": 2.4951, "step": 1199 }, { "epoch": 0.9029345372460497, "grad_norm": 25.68887710571289, "learning_rate": 1.0301109350237718e-06, "loss": 2.457, "step": 1200 }, { "epoch": 0.9036869826937547, "grad_norm": 32.62685012817383, "learning_rate": 1.0221870047543582e-06, "loss": 2.1187, "step": 1201 }, { "epoch": 0.9044394281414597, "grad_norm": 21.595977783203125, "learning_rate": 1.0142630744849446e-06, "loss": 2.019, "step": 1202 }, { "epoch": 0.9051918735891648, "grad_norm": 22.366758346557617, "learning_rate": 1.006339144215531e-06, "loss": 2.5293, "step": 1203 }, { "epoch": 0.9059443190368698, "grad_norm": 22.554767608642578, "learning_rate": 9.984152139461172e-07, "loss": 2.3076, "step": 1204 }, { "epoch": 0.9066967644845748, "grad_norm": 23.065105438232422, "learning_rate": 9.904912836767039e-07, "loss": 2.1777, "step": 1205 }, { "epoch": 0.90744920993228, "grad_norm": 20.888399124145508, "learning_rate": 9.8256735340729e-07, "loss": 2.0293, "step": 1206 }, { "epoch": 0.908201655379985, "grad_norm": 23.42367935180664, "learning_rate": 9.746434231378764e-07, "loss": 1.9868, "step": 1207 }, { "epoch": 0.90895410082769, "grad_norm": 28.955568313598633, "learning_rate": 9.667194928684629e-07, "loss": 2.3193, "step": 1208 }, { "epoch": 0.909706546275395, "grad_norm": 24.234487533569336, "learning_rate": 9.587955625990493e-07, "loss": 2.29, "step": 1209 }, { "epoch": 0.9104589917231001, "grad_norm": 28.754636764526367, "learning_rate": 9.508716323296357e-07, "loss": 1.8267, "step": 1210 }, { "epoch": 0.9112114371708051, "grad_norm": 39.35755920410156, "learning_rate": 9.42947702060222e-07, "loss": 2.5771, "step": 1211 }, { "epoch": 0.9119638826185101, "grad_norm": 32.07710647583008, "learning_rate": 9.350237717908083e-07, "loss": 2.2168, "step": 1212 }, { "epoch": 0.9127163280662152, "grad_norm": 20.59889030456543, "learning_rate": 9.270998415213947e-07, "loss": 1.8052, "step": 1213 }, { "epoch": 0.9134687735139202, "grad_norm": 28.78321647644043, "learning_rate": 9.191759112519811e-07, "loss": 2.0625, "step": 1214 }, { "epoch": 0.9142212189616253, "grad_norm": 28.022022247314453, "learning_rate": 9.112519809825675e-07, "loss": 2.3125, "step": 1215 }, { "epoch": 0.9149736644093304, "grad_norm": 25.90964698791504, "learning_rate": 9.033280507131538e-07, "loss": 2.3125, "step": 1216 }, { "epoch": 0.9157261098570354, "grad_norm": 23.057811737060547, "learning_rate": 8.954041204437401e-07, "loss": 1.9546, "step": 1217 }, { "epoch": 0.9164785553047404, "grad_norm": 29.27845001220703, "learning_rate": 8.874801901743266e-07, "loss": 2.0498, "step": 1218 }, { "epoch": 0.9172310007524455, "grad_norm": 18.934534072875977, "learning_rate": 8.795562599049129e-07, "loss": 1.6738, "step": 1219 }, { "epoch": 0.9179834462001505, "grad_norm": 24.765535354614258, "learning_rate": 8.716323296354994e-07, "loss": 1.8662, "step": 1220 }, { "epoch": 0.9187358916478555, "grad_norm": 24.855249404907227, "learning_rate": 8.637083993660857e-07, "loss": 2.2725, "step": 1221 }, { "epoch": 0.9194883370955605, "grad_norm": 25.016971588134766, "learning_rate": 8.55784469096672e-07, "loss": 2.1836, "step": 1222 }, { "epoch": 0.9202407825432656, "grad_norm": 21.017953872680664, "learning_rate": 8.478605388272584e-07, "loss": 1.5679, "step": 1223 }, { "epoch": 0.9209932279909706, "grad_norm": 22.584678649902344, "learning_rate": 8.399366085578448e-07, "loss": 2.208, "step": 1224 }, { "epoch": 0.9217456734386757, "grad_norm": 28.47764778137207, "learning_rate": 8.320126782884311e-07, "loss": 2.3271, "step": 1225 }, { "epoch": 0.9224981188863808, "grad_norm": 19.13991928100586, "learning_rate": 8.240887480190175e-07, "loss": 2.0029, "step": 1226 }, { "epoch": 0.9232505643340858, "grad_norm": 22.010602951049805, "learning_rate": 8.161648177496038e-07, "loss": 2.271, "step": 1227 }, { "epoch": 0.9240030097817908, "grad_norm": 20.220909118652344, "learning_rate": 8.082408874801903e-07, "loss": 1.8848, "step": 1228 }, { "epoch": 0.9247554552294959, "grad_norm": 22.18108558654785, "learning_rate": 8.003169572107766e-07, "loss": 1.9492, "step": 1229 }, { "epoch": 0.9255079006772009, "grad_norm": 23.985708236694336, "learning_rate": 7.923930269413629e-07, "loss": 2.5166, "step": 1230 }, { "epoch": 0.9262603461249059, "grad_norm": 22.24486541748047, "learning_rate": 7.844690966719494e-07, "loss": 2.1641, "step": 1231 }, { "epoch": 0.927012791572611, "grad_norm": 24.414819717407227, "learning_rate": 7.765451664025357e-07, "loss": 1.9678, "step": 1232 }, { "epoch": 0.927765237020316, "grad_norm": 24.730697631835938, "learning_rate": 7.68621236133122e-07, "loss": 2.6123, "step": 1233 }, { "epoch": 0.928517682468021, "grad_norm": 27.271892547607422, "learning_rate": 7.606973058637085e-07, "loss": 1.9854, "step": 1234 }, { "epoch": 0.9292701279157262, "grad_norm": 29.186485290527344, "learning_rate": 7.527733755942948e-07, "loss": 2.0908, "step": 1235 }, { "epoch": 0.9300225733634312, "grad_norm": 28.539831161499023, "learning_rate": 7.448494453248812e-07, "loss": 2.0962, "step": 1236 }, { "epoch": 0.9307750188111362, "grad_norm": 37.84783172607422, "learning_rate": 7.369255150554675e-07, "loss": 1.8511, "step": 1237 }, { "epoch": 0.9315274642588413, "grad_norm": 20.103530883789062, "learning_rate": 7.290015847860539e-07, "loss": 2.293, "step": 1238 }, { "epoch": 0.9322799097065463, "grad_norm": 20.439922332763672, "learning_rate": 7.210776545166403e-07, "loss": 2.252, "step": 1239 }, { "epoch": 0.9330323551542513, "grad_norm": 23.894981384277344, "learning_rate": 7.131537242472266e-07, "loss": 2.6875, "step": 1240 }, { "epoch": 0.9337848006019563, "grad_norm": 19.7099666595459, "learning_rate": 7.052297939778129e-07, "loss": 2.0059, "step": 1241 }, { "epoch": 0.9345372460496614, "grad_norm": 33.36943054199219, "learning_rate": 6.973058637083995e-07, "loss": 2.6074, "step": 1242 }, { "epoch": 0.9352896914973664, "grad_norm": 35.51583480834961, "learning_rate": 6.893819334389858e-07, "loss": 2.46, "step": 1243 }, { "epoch": 0.9360421369450714, "grad_norm": 24.268138885498047, "learning_rate": 6.814580031695723e-07, "loss": 2.5137, "step": 1244 }, { "epoch": 0.9367945823927766, "grad_norm": 23.564882278442383, "learning_rate": 6.735340729001586e-07, "loss": 2.5801, "step": 1245 }, { "epoch": 0.9375470278404816, "grad_norm": 25.725584030151367, "learning_rate": 6.656101426307449e-07, "loss": 2.377, "step": 1246 }, { "epoch": 0.9382994732881866, "grad_norm": 22.05669593811035, "learning_rate": 6.576862123613313e-07, "loss": 2.0371, "step": 1247 }, { "epoch": 0.9390519187358917, "grad_norm": 22.974348068237305, "learning_rate": 6.497622820919177e-07, "loss": 2.25, "step": 1248 }, { "epoch": 0.9398043641835967, "grad_norm": 18.700010299682617, "learning_rate": 6.41838351822504e-07, "loss": 1.9766, "step": 1249 }, { "epoch": 0.9405568096313017, "grad_norm": 30.736019134521484, "learning_rate": 6.339144215530904e-07, "loss": 2.3047, "step": 1250 }, { "epoch": 0.9413092550790068, "grad_norm": 19.181961059570312, "learning_rate": 6.259904912836768e-07, "loss": 1.6123, "step": 1251 }, { "epoch": 0.9420617005267118, "grad_norm": 24.28353500366211, "learning_rate": 6.180665610142631e-07, "loss": 3.0742, "step": 1252 }, { "epoch": 0.9428141459744168, "grad_norm": 17.89773941040039, "learning_rate": 6.101426307448495e-07, "loss": 2.1025, "step": 1253 }, { "epoch": 0.9435665914221218, "grad_norm": 20.147546768188477, "learning_rate": 6.022187004754359e-07, "loss": 1.6606, "step": 1254 }, { "epoch": 0.944319036869827, "grad_norm": 22.10161781311035, "learning_rate": 5.942947702060222e-07, "loss": 2.126, "step": 1255 }, { "epoch": 0.945071482317532, "grad_norm": 20.982725143432617, "learning_rate": 5.863708399366086e-07, "loss": 1.7944, "step": 1256 }, { "epoch": 0.945823927765237, "grad_norm": 28.338546752929688, "learning_rate": 5.78446909667195e-07, "loss": 1.9727, "step": 1257 }, { "epoch": 0.9465763732129421, "grad_norm": 24.861492156982422, "learning_rate": 5.705229793977814e-07, "loss": 2.1104, "step": 1258 }, { "epoch": 0.9473288186606471, "grad_norm": 20.712827682495117, "learning_rate": 5.625990491283677e-07, "loss": 2.1768, "step": 1259 }, { "epoch": 0.9480812641083521, "grad_norm": 17.77390480041504, "learning_rate": 5.546751188589541e-07, "loss": 1.8994, "step": 1260 }, { "epoch": 0.9488337095560572, "grad_norm": 16.790987014770508, "learning_rate": 5.467511885895405e-07, "loss": 2.24, "step": 1261 }, { "epoch": 0.9495861550037622, "grad_norm": 30.8397159576416, "learning_rate": 5.388272583201268e-07, "loss": 2.4854, "step": 1262 }, { "epoch": 0.9503386004514672, "grad_norm": 21.42900276184082, "learning_rate": 5.309033280507132e-07, "loss": 2.0771, "step": 1263 }, { "epoch": 0.9510910458991723, "grad_norm": 18.064916610717773, "learning_rate": 5.229793977812995e-07, "loss": 1.6284, "step": 1264 }, { "epoch": 0.9518434913468774, "grad_norm": 25.005502700805664, "learning_rate": 5.150554675118859e-07, "loss": 2.2305, "step": 1265 }, { "epoch": 0.9525959367945824, "grad_norm": 27.3756160736084, "learning_rate": 5.071315372424723e-07, "loss": 2.4912, "step": 1266 }, { "epoch": 0.9533483822422875, "grad_norm": 19.913427352905273, "learning_rate": 4.992076069730586e-07, "loss": 2.0059, "step": 1267 }, { "epoch": 0.9541008276899925, "grad_norm": 33.70522689819336, "learning_rate": 4.91283676703645e-07, "loss": 1.7095, "step": 1268 }, { "epoch": 0.9548532731376975, "grad_norm": 20.042531967163086, "learning_rate": 4.833597464342314e-07, "loss": 2.4277, "step": 1269 }, { "epoch": 0.9556057185854026, "grad_norm": 23.788270950317383, "learning_rate": 4.7543581616481783e-07, "loss": 2.3633, "step": 1270 }, { "epoch": 0.9563581640331076, "grad_norm": 20.8429012298584, "learning_rate": 4.6751188589540413e-07, "loss": 2.3979, "step": 1271 }, { "epoch": 0.9571106094808126, "grad_norm": 36.584800720214844, "learning_rate": 4.5958795562599054e-07, "loss": 2.1558, "step": 1272 }, { "epoch": 0.9578630549285176, "grad_norm": 25.138290405273438, "learning_rate": 4.516640253565769e-07, "loss": 2.4678, "step": 1273 }, { "epoch": 0.9586155003762227, "grad_norm": 32.93474197387695, "learning_rate": 4.437400950871633e-07, "loss": 2.0742, "step": 1274 }, { "epoch": 0.9593679458239278, "grad_norm": 26.20588493347168, "learning_rate": 4.358161648177497e-07, "loss": 1.9326, "step": 1275 }, { "epoch": 0.9601203912716328, "grad_norm": 25.365707397460938, "learning_rate": 4.27892234548336e-07, "loss": 2.0967, "step": 1276 }, { "epoch": 0.9608728367193379, "grad_norm": 22.695459365844727, "learning_rate": 4.199683042789224e-07, "loss": 2.4688, "step": 1277 }, { "epoch": 0.9616252821670429, "grad_norm": 19.69232177734375, "learning_rate": 4.1204437400950875e-07, "loss": 2.249, "step": 1278 }, { "epoch": 0.9623777276147479, "grad_norm": 23.908605575561523, "learning_rate": 4.0412044374009516e-07, "loss": 2.3076, "step": 1279 }, { "epoch": 0.963130173062453, "grad_norm": 22.21122169494629, "learning_rate": 3.9619651347068146e-07, "loss": 1.8652, "step": 1280 }, { "epoch": 0.963882618510158, "grad_norm": 20.96678352355957, "learning_rate": 3.8827258320126786e-07, "loss": 2.583, "step": 1281 }, { "epoch": 0.964635063957863, "grad_norm": 22.674808502197266, "learning_rate": 3.8034865293185427e-07, "loss": 2.2803, "step": 1282 }, { "epoch": 0.9653875094055681, "grad_norm": 24.316547393798828, "learning_rate": 3.724247226624406e-07, "loss": 2.5156, "step": 1283 }, { "epoch": 0.9661399548532731, "grad_norm": 21.583770751953125, "learning_rate": 3.6450079239302697e-07, "loss": 2.2202, "step": 1284 }, { "epoch": 0.9668924003009782, "grad_norm": 22.84387969970703, "learning_rate": 3.565768621236133e-07, "loss": 2.4961, "step": 1285 }, { "epoch": 0.9676448457486833, "grad_norm": 27.4648380279541, "learning_rate": 3.486529318541997e-07, "loss": 2.7754, "step": 1286 }, { "epoch": 0.9683972911963883, "grad_norm": 23.863908767700195, "learning_rate": 3.4072900158478613e-07, "loss": 2.4111, "step": 1287 }, { "epoch": 0.9691497366440933, "grad_norm": 27.128982543945312, "learning_rate": 3.3280507131537243e-07, "loss": 2.2827, "step": 1288 }, { "epoch": 0.9699021820917983, "grad_norm": 25.93006134033203, "learning_rate": 3.2488114104595883e-07, "loss": 2.3442, "step": 1289 }, { "epoch": 0.9706546275395034, "grad_norm": 26.65546417236328, "learning_rate": 3.169572107765452e-07, "loss": 2.1821, "step": 1290 }, { "epoch": 0.9714070729872084, "grad_norm": 43.7320442199707, "learning_rate": 3.0903328050713154e-07, "loss": 2.998, "step": 1291 }, { "epoch": 0.9721595184349134, "grad_norm": 20.625991821289062, "learning_rate": 3.0110935023771794e-07, "loss": 2.6406, "step": 1292 }, { "epoch": 0.9729119638826185, "grad_norm": 19.000179290771484, "learning_rate": 2.931854199683043e-07, "loss": 2.1602, "step": 1293 }, { "epoch": 0.9736644093303235, "grad_norm": 21.424129486083984, "learning_rate": 2.852614896988907e-07, "loss": 2.0127, "step": 1294 }, { "epoch": 0.9744168547780286, "grad_norm": 29.558860778808594, "learning_rate": 2.7733755942947705e-07, "loss": 2.7988, "step": 1295 }, { "epoch": 0.9751693002257337, "grad_norm": 26.818078994750977, "learning_rate": 2.694136291600634e-07, "loss": 1.9385, "step": 1296 }, { "epoch": 0.9759217456734387, "grad_norm": 23.0302791595459, "learning_rate": 2.6148969889064975e-07, "loss": 2.0264, "step": 1297 }, { "epoch": 0.9766741911211437, "grad_norm": 19.594850540161133, "learning_rate": 2.5356576862123616e-07, "loss": 1.9297, "step": 1298 }, { "epoch": 0.9774266365688488, "grad_norm": 29.594823837280273, "learning_rate": 2.456418383518225e-07, "loss": 2.3984, "step": 1299 }, { "epoch": 0.9781790820165538, "grad_norm": 18.358137130737305, "learning_rate": 2.3771790808240892e-07, "loss": 2.019, "step": 1300 }, { "epoch": 0.9789315274642588, "grad_norm": 26.450931549072266, "learning_rate": 2.2979397781299527e-07, "loss": 2.0542, "step": 1301 }, { "epoch": 0.9796839729119639, "grad_norm": 20.932947158813477, "learning_rate": 2.2187004754358165e-07, "loss": 2.2026, "step": 1302 }, { "epoch": 0.9804364183596689, "grad_norm": 21.195083618164062, "learning_rate": 2.13946117274168e-07, "loss": 2.2148, "step": 1303 }, { "epoch": 0.9811888638073739, "grad_norm": 25.654787063598633, "learning_rate": 2.0602218700475438e-07, "loss": 2.0049, "step": 1304 }, { "epoch": 0.981941309255079, "grad_norm": 28.244712829589844, "learning_rate": 1.9809825673534073e-07, "loss": 1.793, "step": 1305 }, { "epoch": 0.9826937547027841, "grad_norm": 30.86054801940918, "learning_rate": 1.9017432646592713e-07, "loss": 2.1162, "step": 1306 }, { "epoch": 0.9834462001504891, "grad_norm": 19.956132888793945, "learning_rate": 1.8225039619651348e-07, "loss": 2.2725, "step": 1307 }, { "epoch": 0.9841986455981941, "grad_norm": 22.997634887695312, "learning_rate": 1.7432646592709986e-07, "loss": 2.4033, "step": 1308 }, { "epoch": 0.9849510910458992, "grad_norm": 32.66743087768555, "learning_rate": 1.6640253565768621e-07, "loss": 2.792, "step": 1309 }, { "epoch": 0.9857035364936042, "grad_norm": 18.8122501373291, "learning_rate": 1.584786053882726e-07, "loss": 1.9287, "step": 1310 }, { "epoch": 0.9864559819413092, "grad_norm": 18.69032096862793, "learning_rate": 1.5055467511885897e-07, "loss": 2.1504, "step": 1311 }, { "epoch": 0.9872084273890143, "grad_norm": 23.050729751586914, "learning_rate": 1.4263074484944535e-07, "loss": 2.1992, "step": 1312 }, { "epoch": 0.9879608728367193, "grad_norm": 24.075252532958984, "learning_rate": 1.347068145800317e-07, "loss": 2.7788, "step": 1313 }, { "epoch": 0.9887133182844243, "grad_norm": 21.160234451293945, "learning_rate": 1.2678288431061808e-07, "loss": 2.4111, "step": 1314 }, { "epoch": 0.9894657637321295, "grad_norm": 21.52849578857422, "learning_rate": 1.1885895404120446e-07, "loss": 1.6572, "step": 1315 }, { "epoch": 0.9902182091798345, "grad_norm": 25.144681930541992, "learning_rate": 1.1093502377179082e-07, "loss": 2.0186, "step": 1316 }, { "epoch": 0.9909706546275395, "grad_norm": 24.97117042541504, "learning_rate": 1.0301109350237719e-07, "loss": 2.4766, "step": 1317 }, { "epoch": 0.9917231000752446, "grad_norm": 21.990854263305664, "learning_rate": 9.508716323296357e-08, "loss": 2.3086, "step": 1318 }, { "epoch": 0.9924755455229496, "grad_norm": 20.860063552856445, "learning_rate": 8.716323296354993e-08, "loss": 1.8662, "step": 1319 }, { "epoch": 0.9932279909706546, "grad_norm": 31.04228401184082, "learning_rate": 7.92393026941363e-08, "loss": 2.043, "step": 1320 }, { "epoch": 0.9939804364183596, "grad_norm": 22.26874351501465, "learning_rate": 7.131537242472267e-08, "loss": 2.2002, "step": 1321 }, { "epoch": 0.9947328818660647, "grad_norm": 25.829164505004883, "learning_rate": 6.339144215530904e-08, "loss": 2.2036, "step": 1322 }, { "epoch": 0.9954853273137697, "grad_norm": 23.04068374633789, "learning_rate": 5.546751188589541e-08, "loss": 2.2529, "step": 1323 }, { "epoch": 0.9962377727614747, "grad_norm": 21.899280548095703, "learning_rate": 4.754358161648178e-08, "loss": 2.6411, "step": 1324 }, { "epoch": 0.9969902182091799, "grad_norm": 28.25452423095703, "learning_rate": 3.961965134706815e-08, "loss": 2.1074, "step": 1325 }, { "epoch": 0.9977426636568849, "grad_norm": 22.75929069519043, "learning_rate": 3.169572107765452e-08, "loss": 2.2275, "step": 1326 }, { "epoch": 0.9984951091045899, "grad_norm": 32.01602554321289, "learning_rate": 2.377179080824089e-08, "loss": 2.252, "step": 1327 }, { "epoch": 0.999247554552295, "grad_norm": 20.957666397094727, "learning_rate": 1.584786053882726e-08, "loss": 2.002, "step": 1328 }, { "epoch": 1.0, "grad_norm": 21.312353134155273, "learning_rate": 7.92393026941363e-09, "loss": 2.4668, "step": 1329 }, { "epoch": 1.0, "step": 1329, "total_flos": 2.392704719865774e+18, "train_loss": 2.6215605380572797, "train_runtime": 1486.6252, "train_samples_per_second": 228.706, "train_steps_per_second": 0.894 } ], "logging_steps": 1, "max_steps": 1329, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.392704719865774e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }