| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9996550874599656, |
| "eval_steps": 500, |
| "global_step": 2536, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003941857600394186, |
| "grad_norm": 105.2694138675085, |
| "learning_rate": 7.874015748031497e-08, |
| "loss": 9.1364, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007883715200788372, |
| "grad_norm": 96.39150101397924, |
| "learning_rate": 1.5748031496062994e-07, |
| "loss": 9.154, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0011825572801182557, |
| "grad_norm": 101.32411852803344, |
| "learning_rate": 2.362204724409449e-07, |
| "loss": 9.0663, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0015767430401576743, |
| "grad_norm": 82.40406383837171, |
| "learning_rate": 3.149606299212599e-07, |
| "loss": 9.042, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.001970928800197093, |
| "grad_norm": 67.15234107674974, |
| "learning_rate": 3.937007874015748e-07, |
| "loss": 8.8627, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.0023651145602365115, |
| "grad_norm": 50.03571730325678, |
| "learning_rate": 4.724409448818898e-07, |
| "loss": 8.7217, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00275930032027593, |
| "grad_norm": 44.44168073728412, |
| "learning_rate": 5.511811023622048e-07, |
| "loss": 8.7231, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0031534860803153486, |
| "grad_norm": 58.27436136079177, |
| "learning_rate": 6.299212598425198e-07, |
| "loss": 8.6106, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.003547671840354767, |
| "grad_norm": 53.84109799579168, |
| "learning_rate": 7.086614173228346e-07, |
| "loss": 8.5611, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003941857600394186, |
| "grad_norm": 57.647941933879174, |
| "learning_rate": 7.874015748031496e-07, |
| "loss": 8.4481, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004336043360433604, |
| "grad_norm": 53.27276526815267, |
| "learning_rate": 8.661417322834646e-07, |
| "loss": 8.2106, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004730229120473023, |
| "grad_norm": 41.91985046682287, |
| "learning_rate": 9.448818897637796e-07, |
| "loss": 8.178, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0051244148805124415, |
| "grad_norm": 53.755309589361765, |
| "learning_rate": 1.0236220472440946e-06, |
| "loss": 7.9319, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00551860064055186, |
| "grad_norm": 46.32010256680663, |
| "learning_rate": 1.1023622047244096e-06, |
| "loss": 7.8888, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.005912786400591279, |
| "grad_norm": 49.6775736894897, |
| "learning_rate": 1.1811023622047246e-06, |
| "loss": 7.5264, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.006306972160630697, |
| "grad_norm": 42.62706429768497, |
| "learning_rate": 1.2598425196850396e-06, |
| "loss": 7.3966, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006701157920670116, |
| "grad_norm": 49.62426641360553, |
| "learning_rate": 1.3385826771653545e-06, |
| "loss": 7.2773, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.007095343680709534, |
| "grad_norm": 37.16995292351442, |
| "learning_rate": 1.4173228346456693e-06, |
| "loss": 7.1822, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.007489529440748953, |
| "grad_norm": 47.04696207168547, |
| "learning_rate": 1.4960629921259845e-06, |
| "loss": 7.0047, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007883715200788372, |
| "grad_norm": 37.94846722638193, |
| "learning_rate": 1.5748031496062992e-06, |
| "loss": 6.7177, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00827790096082779, |
| "grad_norm": 38.60623637532149, |
| "learning_rate": 1.6535433070866144e-06, |
| "loss": 6.6605, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.008672086720867209, |
| "grad_norm": 32.058378304509375, |
| "learning_rate": 1.7322834645669292e-06, |
| "loss": 6.4935, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.009066272480906627, |
| "grad_norm": 29.645376469665575, |
| "learning_rate": 1.8110236220472444e-06, |
| "loss": 6.1995, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.009460458240946046, |
| "grad_norm": 27.31573397346269, |
| "learning_rate": 1.8897637795275591e-06, |
| "loss": 6.0939, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.009854644000985464, |
| "grad_norm": 28.841277709048004, |
| "learning_rate": 1.968503937007874e-06, |
| "loss": 5.8683, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.010248829761024883, |
| "grad_norm": 28.590239784856998, |
| "learning_rate": 2.0472440944881893e-06, |
| "loss": 5.7835, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.010643015521064302, |
| "grad_norm": 24.204577484360353, |
| "learning_rate": 2.125984251968504e-06, |
| "loss": 5.7053, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.01103720128110372, |
| "grad_norm": 26.31176344346586, |
| "learning_rate": 2.2047244094488192e-06, |
| "loss": 5.4684, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.011431387041143139, |
| "grad_norm": 32.01648516861882, |
| "learning_rate": 2.283464566929134e-06, |
| "loss": 5.3315, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.011825572801182557, |
| "grad_norm": 19.524646014081327, |
| "learning_rate": 2.362204724409449e-06, |
| "loss": 5.3126, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.012219758561221976, |
| "grad_norm": 17.63176728075391, |
| "learning_rate": 2.440944881889764e-06, |
| "loss": 5.0745, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.012613944321261394, |
| "grad_norm": 15.40058495681761, |
| "learning_rate": 2.519685039370079e-06, |
| "loss": 5.0836, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.013008130081300813, |
| "grad_norm": 15.479272180480063, |
| "learning_rate": 2.598425196850394e-06, |
| "loss": 4.7458, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.013402315841340232, |
| "grad_norm": 14.695908677216751, |
| "learning_rate": 2.677165354330709e-06, |
| "loss": 4.7244, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.01379650160137965, |
| "grad_norm": 13.524764594889588, |
| "learning_rate": 2.755905511811024e-06, |
| "loss": 4.3899, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.014190687361419069, |
| "grad_norm": 13.246003272441062, |
| "learning_rate": 2.8346456692913386e-06, |
| "loss": 4.5221, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.014584873121458487, |
| "grad_norm": 12.203731349756566, |
| "learning_rate": 2.9133858267716538e-06, |
| "loss": 4.3881, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.014979058881497906, |
| "grad_norm": 14.042134575473153, |
| "learning_rate": 2.992125984251969e-06, |
| "loss": 4.2049, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.015373244641537324, |
| "grad_norm": 11.055066543698642, |
| "learning_rate": 3.0708661417322837e-06, |
| "loss": 4.3104, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.015767430401576743, |
| "grad_norm": 10.153496145045878, |
| "learning_rate": 3.1496062992125985e-06, |
| "loss": 4.3375, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01616161616161616, |
| "grad_norm": 10.799528438464218, |
| "learning_rate": 3.2283464566929136e-06, |
| "loss": 4.4063, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.01655580192165558, |
| "grad_norm": 8.368548564564762, |
| "learning_rate": 3.307086614173229e-06, |
| "loss": 3.7956, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.016949987681695, |
| "grad_norm": 10.759799829642327, |
| "learning_rate": 3.3858267716535436e-06, |
| "loss": 3.9338, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.017344173441734417, |
| "grad_norm": 9.554117583184022, |
| "learning_rate": 3.4645669291338583e-06, |
| "loss": 3.8938, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.017738359201773836, |
| "grad_norm": 23.01084152913365, |
| "learning_rate": 3.5433070866141735e-06, |
| "loss": 3.8921, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.018132544961813254, |
| "grad_norm": 9.532765765693696, |
| "learning_rate": 3.6220472440944887e-06, |
| "loss": 3.9267, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.018526730721852673, |
| "grad_norm": 7.676169667219361, |
| "learning_rate": 3.7007874015748035e-06, |
| "loss": 3.5909, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.01892091648189209, |
| "grad_norm": 12.040351419125447, |
| "learning_rate": 3.7795275590551182e-06, |
| "loss": 3.9373, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01931510224193151, |
| "grad_norm": 8.25216993424453, |
| "learning_rate": 3.858267716535433e-06, |
| "loss": 3.5314, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01970928800197093, |
| "grad_norm": 7.474412198918091, |
| "learning_rate": 3.937007874015748e-06, |
| "loss": 3.506, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.020103473762010347, |
| "grad_norm": 7.847621110877795, |
| "learning_rate": 4.015748031496064e-06, |
| "loss": 3.5028, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.020497659522049766, |
| "grad_norm": 6.570956902449958, |
| "learning_rate": 4.0944881889763785e-06, |
| "loss": 3.4612, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.020891845282089185, |
| "grad_norm": 5.5766242231172924, |
| "learning_rate": 4.173228346456693e-06, |
| "loss": 3.2965, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.021286031042128603, |
| "grad_norm": 6.108165687578511, |
| "learning_rate": 4.251968503937008e-06, |
| "loss": 3.4297, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.02168021680216802, |
| "grad_norm": 5.219670006640724, |
| "learning_rate": 4.330708661417324e-06, |
| "loss": 2.9365, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.02207440256220744, |
| "grad_norm": 11.909762655268862, |
| "learning_rate": 4.4094488188976384e-06, |
| "loss": 3.3342, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.02246858832224686, |
| "grad_norm": 6.039053713195223, |
| "learning_rate": 4.488188976377953e-06, |
| "loss": 3.1308, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.022862774082286277, |
| "grad_norm": 6.330821449415944, |
| "learning_rate": 4.566929133858268e-06, |
| "loss": 3.1559, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.023256959842325696, |
| "grad_norm": 5.850842944173947, |
| "learning_rate": 4.645669291338583e-06, |
| "loss": 3.1376, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.023651145602365115, |
| "grad_norm": 6.618904157271684, |
| "learning_rate": 4.724409448818898e-06, |
| "loss": 3.1044, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.024045331362404533, |
| "grad_norm": 12.768772667010369, |
| "learning_rate": 4.803149606299213e-06, |
| "loss": 2.8825, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.02443951712244395, |
| "grad_norm": 7.679745085489206, |
| "learning_rate": 4.881889763779528e-06, |
| "loss": 3.0757, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.02483370288248337, |
| "grad_norm": 4.427650604634613, |
| "learning_rate": 4.960629921259843e-06, |
| "loss": 2.8175, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.02522788864252279, |
| "grad_norm": 6.028182477121757, |
| "learning_rate": 5.039370078740158e-06, |
| "loss": 2.998, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.025622074402562207, |
| "grad_norm": 5.50324148915112, |
| "learning_rate": 5.118110236220473e-06, |
| "loss": 2.9141, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.026016260162601626, |
| "grad_norm": 4.48735111430469, |
| "learning_rate": 5.196850393700788e-06, |
| "loss": 2.7909, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.026410445922641045, |
| "grad_norm": 5.701752085492088, |
| "learning_rate": 5.2755905511811025e-06, |
| "loss": 2.8697, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.026804631682680463, |
| "grad_norm": 9.227957681435909, |
| "learning_rate": 5.354330708661418e-06, |
| "loss": 2.6822, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.02719881744271988, |
| "grad_norm": 5.786678373864676, |
| "learning_rate": 5.433070866141733e-06, |
| "loss": 2.7271, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.0275930032027593, |
| "grad_norm": 4.652746279810885, |
| "learning_rate": 5.511811023622048e-06, |
| "loss": 2.7177, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02798718896279872, |
| "grad_norm": 6.252735777715452, |
| "learning_rate": 5.590551181102362e-06, |
| "loss": 2.8251, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.028381374722838137, |
| "grad_norm": 5.151704866859134, |
| "learning_rate": 5.669291338582677e-06, |
| "loss": 2.6813, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.028775560482877556, |
| "grad_norm": 4.337181405580127, |
| "learning_rate": 5.748031496062993e-06, |
| "loss": 2.4957, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.029169746242916975, |
| "grad_norm": 5.91427046899434, |
| "learning_rate": 5.8267716535433075e-06, |
| "loss": 2.6815, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.029563932002956393, |
| "grad_norm": 7.660058774479181, |
| "learning_rate": 5.905511811023622e-06, |
| "loss": 2.7335, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.029958117762995812, |
| "grad_norm": 4.115441568706006, |
| "learning_rate": 5.984251968503938e-06, |
| "loss": 2.5424, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.03035230352303523, |
| "grad_norm": 5.097053848951776, |
| "learning_rate": 6.062992125984253e-06, |
| "loss": 2.5098, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.03074648928307465, |
| "grad_norm": 3.609880169600323, |
| "learning_rate": 6.141732283464567e-06, |
| "loss": 2.4653, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.031140675043114067, |
| "grad_norm": 4.8790844537526326, |
| "learning_rate": 6.220472440944882e-06, |
| "loss": 2.5257, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.031534860803153486, |
| "grad_norm": 5.766910080666288, |
| "learning_rate": 6.299212598425197e-06, |
| "loss": 2.5395, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.031929046563192905, |
| "grad_norm": 5.536361935443466, |
| "learning_rate": 6.3779527559055125e-06, |
| "loss": 2.5367, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.03232323232323232, |
| "grad_norm": 4.770127422423979, |
| "learning_rate": 6.456692913385827e-06, |
| "loss": 2.4774, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03271741808327174, |
| "grad_norm": 4.416647274076856, |
| "learning_rate": 6.535433070866142e-06, |
| "loss": 2.4903, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.03311160384331116, |
| "grad_norm": 4.431530080181854, |
| "learning_rate": 6.614173228346458e-06, |
| "loss": 2.3936, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.03350578960335058, |
| "grad_norm": 5.6472652822872895, |
| "learning_rate": 6.692913385826772e-06, |
| "loss": 2.4404, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.03389997536339, |
| "grad_norm": 5.200598323481072, |
| "learning_rate": 6.771653543307087e-06, |
| "loss": 2.4376, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.034294161123429416, |
| "grad_norm": 4.387657662515284, |
| "learning_rate": 6.850393700787402e-06, |
| "loss": 2.3363, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.034688346883468835, |
| "grad_norm": 3.2185171323039192, |
| "learning_rate": 6.929133858267717e-06, |
| "loss": 2.2646, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03508253264350825, |
| "grad_norm": 8.73223179057534, |
| "learning_rate": 7.0078740157480315e-06, |
| "loss": 2.3927, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03547671840354767, |
| "grad_norm": 6.784545315493452, |
| "learning_rate": 7.086614173228347e-06, |
| "loss": 2.3697, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03587090416358709, |
| "grad_norm": 4.333450921434643, |
| "learning_rate": 7.165354330708662e-06, |
| "loss": 2.304, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.03626508992362651, |
| "grad_norm": 5.218824764842207, |
| "learning_rate": 7.2440944881889774e-06, |
| "loss": 2.3646, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03665927568366593, |
| "grad_norm": 4.149232430620695, |
| "learning_rate": 7.322834645669292e-06, |
| "loss": 2.2622, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.037053461443705346, |
| "grad_norm": 4.193773298248102, |
| "learning_rate": 7.401574803149607e-06, |
| "loss": 2.2887, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.037447647203744765, |
| "grad_norm": 4.456311860549035, |
| "learning_rate": 7.480314960629922e-06, |
| "loss": 2.3007, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03784183296378418, |
| "grad_norm": 4.576460153117237, |
| "learning_rate": 7.5590551181102365e-06, |
| "loss": 2.3021, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0382360187238236, |
| "grad_norm": 8.479196171237232, |
| "learning_rate": 7.637795275590551e-06, |
| "loss": 2.4404, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03863020448386302, |
| "grad_norm": 7.433380505053241, |
| "learning_rate": 7.716535433070867e-06, |
| "loss": 2.2858, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.03902439024390244, |
| "grad_norm": 9.169489148787575, |
| "learning_rate": 7.79527559055118e-06, |
| "loss": 2.2905, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03941857600394186, |
| "grad_norm": 4.505614703608414, |
| "learning_rate": 7.874015748031496e-06, |
| "loss": 2.2229, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.039812761763981276, |
| "grad_norm": 3.251111002629772, |
| "learning_rate": 7.952755905511812e-06, |
| "loss": 2.1951, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.040206947524020695, |
| "grad_norm": 4.118590361507865, |
| "learning_rate": 8.031496062992128e-06, |
| "loss": 2.271, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.04060113328406011, |
| "grad_norm": 6.9488591196561815, |
| "learning_rate": 8.110236220472441e-06, |
| "loss": 2.3629, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.04099531904409953, |
| "grad_norm": 3.5799197580937454, |
| "learning_rate": 8.188976377952757e-06, |
| "loss": 2.1602, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.04138950480413895, |
| "grad_norm": 3.698515235577877, |
| "learning_rate": 8.267716535433071e-06, |
| "loss": 2.1759, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.04178369056417837, |
| "grad_norm": 3.2516137577135646, |
| "learning_rate": 8.346456692913387e-06, |
| "loss": 2.2093, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.04217787632421779, |
| "grad_norm": 3.910051851712546, |
| "learning_rate": 8.4251968503937e-06, |
| "loss": 2.2229, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.042572062084257206, |
| "grad_norm": 3.7166583065715137, |
| "learning_rate": 8.503937007874016e-06, |
| "loss": 2.0932, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.042966247844296625, |
| "grad_norm": 2.6575124301921873, |
| "learning_rate": 8.582677165354332e-06, |
| "loss": 2.12, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.04336043360433604, |
| "grad_norm": 3.482590385246152, |
| "learning_rate": 8.661417322834647e-06, |
| "loss": 2.0901, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04375461936437546, |
| "grad_norm": 4.66548163032443, |
| "learning_rate": 8.740157480314961e-06, |
| "loss": 2.0983, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04414880512441488, |
| "grad_norm": 2.813248162118009, |
| "learning_rate": 8.818897637795277e-06, |
| "loss": 2.0084, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0445429908844543, |
| "grad_norm": 2.667639210004557, |
| "learning_rate": 8.89763779527559e-06, |
| "loss": 1.9983, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.04493717664449372, |
| "grad_norm": 3.0839886525609463, |
| "learning_rate": 8.976377952755906e-06, |
| "loss": 2.0084, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.045331362404533136, |
| "grad_norm": 3.000412565293289, |
| "learning_rate": 9.05511811023622e-06, |
| "loss": 1.9718, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.045725548164572555, |
| "grad_norm": 4.642416950929853, |
| "learning_rate": 9.133858267716536e-06, |
| "loss": 1.9841, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.04611973392461197, |
| "grad_norm": 2.3154794311302886, |
| "learning_rate": 9.212598425196852e-06, |
| "loss": 1.9743, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04651391968465139, |
| "grad_norm": 2.545829361546042, |
| "learning_rate": 9.291338582677165e-06, |
| "loss": 1.9539, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.04690810544469081, |
| "grad_norm": 2.974703874097749, |
| "learning_rate": 9.370078740157481e-06, |
| "loss": 1.91, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04730229120473023, |
| "grad_norm": 2.797427125263561, |
| "learning_rate": 9.448818897637797e-06, |
| "loss": 1.9065, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04769647696476965, |
| "grad_norm": 4.324127605691098, |
| "learning_rate": 9.52755905511811e-06, |
| "loss": 1.9863, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.048090662724809066, |
| "grad_norm": 3.2983025416162945, |
| "learning_rate": 9.606299212598426e-06, |
| "loss": 1.9546, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.048484848484848485, |
| "grad_norm": 2.2657892364343017, |
| "learning_rate": 9.68503937007874e-06, |
| "loss": 1.848, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0488790342448879, |
| "grad_norm": 3.2601787777289437, |
| "learning_rate": 9.763779527559056e-06, |
| "loss": 1.9285, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04927322000492732, |
| "grad_norm": 3.5243072214231583, |
| "learning_rate": 9.842519685039371e-06, |
| "loss": 1.8762, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04966740576496674, |
| "grad_norm": 3.3017593501688394, |
| "learning_rate": 9.921259842519685e-06, |
| "loss": 1.8601, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.05006159152500616, |
| "grad_norm": 3.2653646060771444, |
| "learning_rate": 1e-05, |
| "loss": 1.8686, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.05045577728504558, |
| "grad_norm": 2.681802464673681, |
| "learning_rate": 1.0078740157480316e-05, |
| "loss": 1.8302, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.050849963045084996, |
| "grad_norm": 3.133929350491433, |
| "learning_rate": 1.015748031496063e-05, |
| "loss": 1.8372, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.051244148805124415, |
| "grad_norm": 2.534354682692382, |
| "learning_rate": 1.0236220472440946e-05, |
| "loss": 1.8715, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.05163833456516383, |
| "grad_norm": 3.0493154042368023, |
| "learning_rate": 1.031496062992126e-05, |
| "loss": 1.8485, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.05203252032520325, |
| "grad_norm": 2.0799972512373834, |
| "learning_rate": 1.0393700787401575e-05, |
| "loss": 1.7866, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.05242670608524267, |
| "grad_norm": 1.598403007988912, |
| "learning_rate": 1.047244094488189e-05, |
| "loss": 1.8013, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.05282089184528209, |
| "grad_norm": 1.91178664275519, |
| "learning_rate": 1.0551181102362205e-05, |
| "loss": 1.8741, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.05321507760532151, |
| "grad_norm": 2.1365165713401906, |
| "learning_rate": 1.0629921259842522e-05, |
| "loss": 1.7989, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.053609263365360926, |
| "grad_norm": 2.6948885430012655, |
| "learning_rate": 1.0708661417322836e-05, |
| "loss": 1.7984, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.054003449125400345, |
| "grad_norm": 1.8504724810176718, |
| "learning_rate": 1.0787401574803152e-05, |
| "loss": 1.7789, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.05439763488543976, |
| "grad_norm": 1.992151255132755, |
| "learning_rate": 1.0866141732283466e-05, |
| "loss": 1.803, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.05479182064547918, |
| "grad_norm": 3.10045850302244, |
| "learning_rate": 1.0944881889763781e-05, |
| "loss": 1.823, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.0551860064055186, |
| "grad_norm": 2.2624346551381085, |
| "learning_rate": 1.1023622047244095e-05, |
| "loss": 1.7608, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.05558019216555802, |
| "grad_norm": 1.9683772470424854, |
| "learning_rate": 1.1102362204724411e-05, |
| "loss": 1.8037, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05597437792559744, |
| "grad_norm": 3.26220140428376, |
| "learning_rate": 1.1181102362204725e-05, |
| "loss": 1.7765, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.056368563685636856, |
| "grad_norm": 4.4068981319414595, |
| "learning_rate": 1.125984251968504e-05, |
| "loss": 1.8472, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.056762749445676275, |
| "grad_norm": 1.6987954071831348, |
| "learning_rate": 1.1338582677165354e-05, |
| "loss": 1.7572, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.057156935205715693, |
| "grad_norm": 1.847159040073359, |
| "learning_rate": 1.141732283464567e-05, |
| "loss": 1.6803, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.05755112096575511, |
| "grad_norm": 2.6708041585740596, |
| "learning_rate": 1.1496062992125985e-05, |
| "loss": 1.8088, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05794530672579453, |
| "grad_norm": 1.9604986339037445, |
| "learning_rate": 1.15748031496063e-05, |
| "loss": 1.7155, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05833949248583395, |
| "grad_norm": 1.6691911028581192, |
| "learning_rate": 1.1653543307086615e-05, |
| "loss": 1.7748, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05873367824587337, |
| "grad_norm": 7.3318925396826895, |
| "learning_rate": 1.1732283464566929e-05, |
| "loss": 1.7572, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.059127864005912786, |
| "grad_norm": 2.283850168056605, |
| "learning_rate": 1.1811023622047245e-05, |
| "loss": 1.7774, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.059522049765952205, |
| "grad_norm": 1.8019088514589012, |
| "learning_rate": 1.1889763779527562e-05, |
| "loss": 1.7786, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.059916235525991624, |
| "grad_norm": 1.3816061587980675, |
| "learning_rate": 1.1968503937007876e-05, |
| "loss": 1.7504, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.06031042128603104, |
| "grad_norm": 5.720763322290118, |
| "learning_rate": 1.2047244094488191e-05, |
| "loss": 1.8016, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.06070460704607046, |
| "grad_norm": 3.3964912544422994, |
| "learning_rate": 1.2125984251968505e-05, |
| "loss": 1.6964, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.06109879280610988, |
| "grad_norm": 1.7844098526259298, |
| "learning_rate": 1.2204724409448821e-05, |
| "loss": 1.7561, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.0614929785661493, |
| "grad_norm": 1.6826530766646766, |
| "learning_rate": 1.2283464566929135e-05, |
| "loss": 1.7069, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.061887164326188716, |
| "grad_norm": 3.4647919464333152, |
| "learning_rate": 1.236220472440945e-05, |
| "loss": 1.7096, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.062281350086228135, |
| "grad_norm": 2.0613781006838243, |
| "learning_rate": 1.2440944881889764e-05, |
| "loss": 1.732, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.06267553584626756, |
| "grad_norm": 1.9503601214626853, |
| "learning_rate": 1.251968503937008e-05, |
| "loss": 1.7402, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.06306972160630697, |
| "grad_norm": 1.8504549835287638, |
| "learning_rate": 1.2598425196850394e-05, |
| "loss": 1.7003, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0634639073663464, |
| "grad_norm": 2.07948846446986, |
| "learning_rate": 1.267716535433071e-05, |
| "loss": 1.7004, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.06385809312638581, |
| "grad_norm": 1.7485726412453775, |
| "learning_rate": 1.2755905511811025e-05, |
| "loss": 1.725, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.06425227888642523, |
| "grad_norm": 1.7868478014046527, |
| "learning_rate": 1.2834645669291339e-05, |
| "loss": 1.6828, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.06464646464646465, |
| "grad_norm": 1.4524583527842783, |
| "learning_rate": 1.2913385826771655e-05, |
| "loss": 1.726, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.06504065040650407, |
| "grad_norm": 1.5085438907961388, |
| "learning_rate": 1.2992125984251968e-05, |
| "loss": 1.6417, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06543483616654348, |
| "grad_norm": 1.5307066166089378, |
| "learning_rate": 1.3070866141732284e-05, |
| "loss": 1.6291, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06582902192658291, |
| "grad_norm": 1.5549360763645417, |
| "learning_rate": 1.3149606299212601e-05, |
| "loss": 1.6966, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.06622320768662232, |
| "grad_norm": 2.1633140111873272, |
| "learning_rate": 1.3228346456692915e-05, |
| "loss": 1.5821, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.06661739344666175, |
| "grad_norm": 1.4726739949688163, |
| "learning_rate": 1.3307086614173231e-05, |
| "loss": 1.6008, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06701157920670116, |
| "grad_norm": 1.933336638607143, |
| "learning_rate": 1.3385826771653545e-05, |
| "loss": 1.6237, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.06740576496674058, |
| "grad_norm": 1.53709942550425, |
| "learning_rate": 1.346456692913386e-05, |
| "loss": 1.6603, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06779995072678, |
| "grad_norm": 5.838182266578105, |
| "learning_rate": 1.3543307086614174e-05, |
| "loss": 1.7374, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.06819413648681942, |
| "grad_norm": 2.1077670495936105, |
| "learning_rate": 1.362204724409449e-05, |
| "loss": 1.6751, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06858832224685883, |
| "grad_norm": 1.79478201657228, |
| "learning_rate": 1.3700787401574804e-05, |
| "loss": 1.6147, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06898250800689826, |
| "grad_norm": 1.3332167033318783, |
| "learning_rate": 1.377952755905512e-05, |
| "loss": 1.6174, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06937669376693767, |
| "grad_norm": 1.3613261661051188, |
| "learning_rate": 1.3858267716535433e-05, |
| "loss": 1.6226, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0697708795269771, |
| "grad_norm": 1.4747645759596355, |
| "learning_rate": 1.3937007874015749e-05, |
| "loss": 1.6831, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.0701650652870165, |
| "grad_norm": 1.2750429533681837, |
| "learning_rate": 1.4015748031496063e-05, |
| "loss": 1.7002, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.07055925104705593, |
| "grad_norm": 1.5316341355433367, |
| "learning_rate": 1.4094488188976379e-05, |
| "loss": 1.6778, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.07095343680709534, |
| "grad_norm": 1.5302517303234198, |
| "learning_rate": 1.4173228346456694e-05, |
| "loss": 1.661, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.07134762256713477, |
| "grad_norm": 1.4890855169186785, |
| "learning_rate": 1.4251968503937008e-05, |
| "loss": 1.6873, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.07174180832717418, |
| "grad_norm": 1.4685898866854017, |
| "learning_rate": 1.4330708661417324e-05, |
| "loss": 1.6183, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.0721359940872136, |
| "grad_norm": 1.1931151423557926, |
| "learning_rate": 1.440944881889764e-05, |
| "loss": 1.6106, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.07253017984725302, |
| "grad_norm": 1.2548801700230896, |
| "learning_rate": 1.4488188976377955e-05, |
| "loss": 1.6201, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.07292436560729244, |
| "grad_norm": 1.316626084569457, |
| "learning_rate": 1.456692913385827e-05, |
| "loss": 1.6652, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.07331855136733186, |
| "grad_norm": 5.515174587786105, |
| "learning_rate": 1.4645669291338584e-05, |
| "loss": 1.6672, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.07371273712737128, |
| "grad_norm": 1.2435134387010485, |
| "learning_rate": 1.47244094488189e-05, |
| "loss": 1.5948, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.07410692288741069, |
| "grad_norm": 1.27329799921956, |
| "learning_rate": 1.4803149606299214e-05, |
| "loss": 1.6548, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.07450110864745012, |
| "grad_norm": 1.2399973778980402, |
| "learning_rate": 1.488188976377953e-05, |
| "loss": 1.604, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.07489529440748953, |
| "grad_norm": 2.394011363721175, |
| "learning_rate": 1.4960629921259843e-05, |
| "loss": 1.6027, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.07528948016752895, |
| "grad_norm": 1.3778750181373447, |
| "learning_rate": 1.5039370078740159e-05, |
| "loss": 1.6389, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.07568366592756837, |
| "grad_norm": 1.5441433369147584, |
| "learning_rate": 1.5118110236220473e-05, |
| "loss": 1.6183, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.07607785168760779, |
| "grad_norm": 4.415312664776792, |
| "learning_rate": 1.5196850393700789e-05, |
| "loss": 1.5881, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.0764720374476472, |
| "grad_norm": 1.6220189908817373, |
| "learning_rate": 1.5275590551181102e-05, |
| "loss": 1.689, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07686622320768663, |
| "grad_norm": 1.2264711147522527, |
| "learning_rate": 1.5354330708661416e-05, |
| "loss": 1.5776, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.07726040896772604, |
| "grad_norm": 1.2490481285394455, |
| "learning_rate": 1.5433070866141734e-05, |
| "loss": 1.6122, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07765459472776547, |
| "grad_norm": 1.2303899509527259, |
| "learning_rate": 1.5511811023622048e-05, |
| "loss": 1.5495, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07804878048780488, |
| "grad_norm": 3.4482635126365997, |
| "learning_rate": 1.559055118110236e-05, |
| "loss": 1.6351, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0784429662478443, |
| "grad_norm": 1.4430016707011335, |
| "learning_rate": 1.566929133858268e-05, |
| "loss": 1.5224, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07883715200788372, |
| "grad_norm": 1.258723675384828, |
| "learning_rate": 1.5748031496062993e-05, |
| "loss": 1.5626, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07923133776792314, |
| "grad_norm": 1.5678661529755662, |
| "learning_rate": 1.582677165354331e-05, |
| "loss": 1.5783, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07962552352796255, |
| "grad_norm": 2.1867650050329535, |
| "learning_rate": 1.5905511811023624e-05, |
| "loss": 1.5969, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.08001970928800198, |
| "grad_norm": 1.2889311434591015, |
| "learning_rate": 1.5984251968503938e-05, |
| "loss": 1.564, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.08041389504804139, |
| "grad_norm": 1.1654066224514485, |
| "learning_rate": 1.6062992125984255e-05, |
| "loss": 1.5517, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.08080808080808081, |
| "grad_norm": 1.2834026840027142, |
| "learning_rate": 1.614173228346457e-05, |
| "loss": 1.5784, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.08120226656812023, |
| "grad_norm": 1.097147109752616, |
| "learning_rate": 1.6220472440944883e-05, |
| "loss": 1.593, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.08159645232815965, |
| "grad_norm": 1.0826077251947002, |
| "learning_rate": 1.6299212598425197e-05, |
| "loss": 1.6672, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.08199063808819906, |
| "grad_norm": 1.1105586301185173, |
| "learning_rate": 1.6377952755905514e-05, |
| "loss": 1.6279, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.08238482384823849, |
| "grad_norm": 1.0509746948712066, |
| "learning_rate": 1.6456692913385828e-05, |
| "loss": 1.5676, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.0827790096082779, |
| "grad_norm": 1.0983909936032894, |
| "learning_rate": 1.6535433070866142e-05, |
| "loss": 1.5829, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.08317319536831733, |
| "grad_norm": 5.99007589257119, |
| "learning_rate": 1.6614173228346456e-05, |
| "loss": 1.7761, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.08356738112835674, |
| "grad_norm": 1.2452212459257412, |
| "learning_rate": 1.6692913385826773e-05, |
| "loss": 1.6174, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.08396156688839616, |
| "grad_norm": 1.2716752881032753, |
| "learning_rate": 1.6771653543307087e-05, |
| "loss": 1.5855, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.08435575264843558, |
| "grad_norm": 1.1250735671327408, |
| "learning_rate": 1.68503937007874e-05, |
| "loss": 1.6358, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.084749938408475, |
| "grad_norm": 1.2260081131211942, |
| "learning_rate": 1.692913385826772e-05, |
| "loss": 1.5142, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.08514412416851441, |
| "grad_norm": 1.1674035474423037, |
| "learning_rate": 1.7007874015748032e-05, |
| "loss": 1.57, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.08553830992855384, |
| "grad_norm": 1.2049471298049268, |
| "learning_rate": 1.708661417322835e-05, |
| "loss": 1.535, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.08593249568859325, |
| "grad_norm": 1.0593135540735228, |
| "learning_rate": 1.7165354330708663e-05, |
| "loss": 1.5262, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.08632668144863268, |
| "grad_norm": 1.2230277479432223, |
| "learning_rate": 1.7244094488188977e-05, |
| "loss": 1.4963, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.08672086720867209, |
| "grad_norm": 1.0841400801567742, |
| "learning_rate": 1.7322834645669295e-05, |
| "loss": 1.464, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08711505296871151, |
| "grad_norm": 1.0657721135903946, |
| "learning_rate": 1.740157480314961e-05, |
| "loss": 1.5183, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.08750923872875092, |
| "grad_norm": 1.0176332279317757, |
| "learning_rate": 1.7480314960629923e-05, |
| "loss": 1.5272, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08790342448879035, |
| "grad_norm": 1.0202676847155607, |
| "learning_rate": 1.7559055118110236e-05, |
| "loss": 1.5327, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08829761024882976, |
| "grad_norm": 6.425041690617794, |
| "learning_rate": 1.7637795275590554e-05, |
| "loss": 1.5531, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08869179600886919, |
| "grad_norm": 1.1786231403068714, |
| "learning_rate": 1.7716535433070868e-05, |
| "loss": 1.5453, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0890859817689086, |
| "grad_norm": 1.2325207985267532, |
| "learning_rate": 1.779527559055118e-05, |
| "loss": 1.6243, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.08948016752894802, |
| "grad_norm": 2.8120821758652292, |
| "learning_rate": 1.7874015748031495e-05, |
| "loss": 1.5169, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08987435328898744, |
| "grad_norm": 1.1463382537995392, |
| "learning_rate": 1.7952755905511813e-05, |
| "loss": 1.5332, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.09026853904902686, |
| "grad_norm": 1.0849881708965645, |
| "learning_rate": 1.8031496062992127e-05, |
| "loss": 1.5723, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.09066272480906627, |
| "grad_norm": 1.1666290000579271, |
| "learning_rate": 1.811023622047244e-05, |
| "loss": 1.5618, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0910569105691057, |
| "grad_norm": 1.2015436620694524, |
| "learning_rate": 1.8188976377952758e-05, |
| "loss": 1.4479, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.09145109632914511, |
| "grad_norm": 1.1770257502445032, |
| "learning_rate": 1.8267716535433072e-05, |
| "loss": 1.4907, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.09184528208918454, |
| "grad_norm": 1.1626480865358226, |
| "learning_rate": 1.834645669291339e-05, |
| "loss": 1.5504, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.09223946784922395, |
| "grad_norm": 1.06078382485064, |
| "learning_rate": 1.8425196850393703e-05, |
| "loss": 1.4953, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.09263365360926337, |
| "grad_norm": 1.0930777847490591, |
| "learning_rate": 1.8503937007874017e-05, |
| "loss": 1.5751, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.09302783936930278, |
| "grad_norm": 1.0032128686122703, |
| "learning_rate": 1.858267716535433e-05, |
| "loss": 1.5573, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.09342202512934221, |
| "grad_norm": 1.316223586320374, |
| "learning_rate": 1.8661417322834648e-05, |
| "loss": 1.5121, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.09381621088938162, |
| "grad_norm": 1.2482520651605957, |
| "learning_rate": 1.8740157480314962e-05, |
| "loss": 1.5444, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.09421039664942105, |
| "grad_norm": 1.0596918045491734, |
| "learning_rate": 1.8818897637795276e-05, |
| "loss": 1.5212, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.09460458240946046, |
| "grad_norm": 10.230035305602996, |
| "learning_rate": 1.8897637795275593e-05, |
| "loss": 1.5136, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.09499876816949988, |
| "grad_norm": 1.7311033327684602, |
| "learning_rate": 1.8976377952755907e-05, |
| "loss": 1.5087, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0953929539295393, |
| "grad_norm": 1.3327399439783965, |
| "learning_rate": 1.905511811023622e-05, |
| "loss": 1.5182, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.09578713968957872, |
| "grad_norm": 1.0615025753084397, |
| "learning_rate": 1.9133858267716535e-05, |
| "loss": 1.5321, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.09618132544961813, |
| "grad_norm": 1.174065978180721, |
| "learning_rate": 1.9212598425196852e-05, |
| "loss": 1.4981, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.09657551120965756, |
| "grad_norm": 1.0837767684996553, |
| "learning_rate": 1.9291338582677166e-05, |
| "loss": 1.4733, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.09696969696969697, |
| "grad_norm": 1.0744329648400928, |
| "learning_rate": 1.937007874015748e-05, |
| "loss": 1.5172, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0973638827297364, |
| "grad_norm": 1.0479477955815488, |
| "learning_rate": 1.9448818897637797e-05, |
| "loss": 1.4767, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0977580684897758, |
| "grad_norm": 0.9622167177031952, |
| "learning_rate": 1.952755905511811e-05, |
| "loss": 1.5212, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09815225424981523, |
| "grad_norm": 2.2109867243739867, |
| "learning_rate": 1.960629921259843e-05, |
| "loss": 1.534, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09854644000985464, |
| "grad_norm": 1.0330950105773389, |
| "learning_rate": 1.9685039370078743e-05, |
| "loss": 1.4988, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09894062576989407, |
| "grad_norm": 1.2543876260436326, |
| "learning_rate": 1.9763779527559057e-05, |
| "loss": 1.5515, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.09933481152993348, |
| "grad_norm": 1.0907032902576081, |
| "learning_rate": 1.984251968503937e-05, |
| "loss": 1.4944, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0997289972899729, |
| "grad_norm": 0.9800946085411166, |
| "learning_rate": 1.9921259842519688e-05, |
| "loss": 1.4594, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.10012318305001232, |
| "grad_norm": 1.005840927677052, |
| "learning_rate": 2e-05, |
| "loss": 1.5125, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.10051736881005174, |
| "grad_norm": 0.9877177677204181, |
| "learning_rate": 1.9999990523708736e-05, |
| "loss": 1.4953, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.10091155457009116, |
| "grad_norm": 1.101690731617668, |
| "learning_rate": 1.999996209485289e-05, |
| "loss": 1.5291, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.10130574033013058, |
| "grad_norm": 1.056828743252167, |
| "learning_rate": 1.9999914713486344e-05, |
| "loss": 1.546, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.10169992609016999, |
| "grad_norm": 1.0379730842348571, |
| "learning_rate": 1.9999848379698906e-05, |
| "loss": 1.5252, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.10209411185020942, |
| "grad_norm": 0.9403586150467369, |
| "learning_rate": 1.999976309361629e-05, |
| "loss": 1.4487, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.10248829761024883, |
| "grad_norm": 0.9899974982933676, |
| "learning_rate": 1.9999658855400135e-05, |
| "loss": 1.4721, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.10288248337028826, |
| "grad_norm": 1.8364244542987356, |
| "learning_rate": 1.9999535665248e-05, |
| "loss": 1.5609, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.10327666913032767, |
| "grad_norm": 1.0844452490408925, |
| "learning_rate": 1.9999393523393365e-05, |
| "loss": 1.4418, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.10367085489036709, |
| "grad_norm": 0.9972732800206876, |
| "learning_rate": 1.9999232430105618e-05, |
| "loss": 1.4595, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.1040650406504065, |
| "grad_norm": 1.0507646311810663, |
| "learning_rate": 1.999905238569008e-05, |
| "loss": 1.5172, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.10445922641044593, |
| "grad_norm": 1.095556225355519, |
| "learning_rate": 1.999885339048798e-05, |
| "loss": 1.4543, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.10485341217048534, |
| "grad_norm": 1.5429221372847546, |
| "learning_rate": 1.999863544487646e-05, |
| "loss": 1.4856, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.10524759793052477, |
| "grad_norm": 1.2099357188247561, |
| "learning_rate": 1.9998398549268594e-05, |
| "loss": 1.5493, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.10564178369056418, |
| "grad_norm": 0.935834153327994, |
| "learning_rate": 1.999814270411335e-05, |
| "loss": 1.4679, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.1060359694506036, |
| "grad_norm": 0.9438202964074678, |
| "learning_rate": 1.9997867909895626e-05, |
| "loss": 1.4995, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.10643015521064302, |
| "grad_norm": 1.033515015322255, |
| "learning_rate": 1.9997574167136225e-05, |
| "loss": 1.5551, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.10682434097068244, |
| "grad_norm": 0.9370254571893236, |
| "learning_rate": 1.9997261476391867e-05, |
| "loss": 1.4224, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.10721852673072185, |
| "grad_norm": 0.8669854368917412, |
| "learning_rate": 1.999692983825518e-05, |
| "loss": 1.4123, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.10761271249076128, |
| "grad_norm": 0.944767717267722, |
| "learning_rate": 1.999657925335471e-05, |
| "loss": 1.4617, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.10800689825080069, |
| "grad_norm": 0.8918613394976922, |
| "learning_rate": 1.9996209722354896e-05, |
| "loss": 1.4717, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.10840108401084012, |
| "grad_norm": 0.8601703235721511, |
| "learning_rate": 1.99958212459561e-05, |
| "loss": 1.4932, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10879526977087953, |
| "grad_norm": 0.8947009718973543, |
| "learning_rate": 1.9995413824894593e-05, |
| "loss": 1.4279, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.10918945553091895, |
| "grad_norm": 0.9310105648146282, |
| "learning_rate": 1.9994987459942528e-05, |
| "loss": 1.4802, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10958364129095836, |
| "grad_norm": 0.8501846281501174, |
| "learning_rate": 1.9994542151907988e-05, |
| "loss": 1.4749, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10997782705099779, |
| "grad_norm": 1.0075642218200616, |
| "learning_rate": 1.999407790163494e-05, |
| "loss": 1.4024, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.1103720128110372, |
| "grad_norm": 0.8724020295218536, |
| "learning_rate": 1.9993594710003262e-05, |
| "loss": 1.4781, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.11076619857107663, |
| "grad_norm": 0.9028708477460494, |
| "learning_rate": 1.9993092577928725e-05, |
| "loss": 1.4662, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.11116038433111604, |
| "grad_norm": 0.9000611147078907, |
| "learning_rate": 1.9992571506362997e-05, |
| "loss": 1.5075, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.11155457009115546, |
| "grad_norm": 0.8987129723251234, |
| "learning_rate": 1.9992031496293652e-05, |
| "loss": 1.4287, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.11194875585119488, |
| "grad_norm": 0.9407581537583124, |
| "learning_rate": 1.999147254874414e-05, |
| "loss": 1.4692, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.1123429416112343, |
| "grad_norm": 0.8489305931721897, |
| "learning_rate": 1.999089466477381e-05, |
| "loss": 1.4033, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.11273712737127371, |
| "grad_norm": 0.9701130113270408, |
| "learning_rate": 1.999029784547791e-05, |
| "loss": 1.4633, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.11313131313131314, |
| "grad_norm": 0.9645372818337129, |
| "learning_rate": 1.9989682091987558e-05, |
| "loss": 1.4762, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.11352549889135255, |
| "grad_norm": 0.8958997231087552, |
| "learning_rate": 1.9989047405469772e-05, |
| "loss": 1.4915, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.11391968465139198, |
| "grad_norm": 0.8671815258371959, |
| "learning_rate": 1.9988393787127444e-05, |
| "loss": 1.4463, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.11431387041143139, |
| "grad_norm": 0.8618517053204878, |
| "learning_rate": 1.9987721238199345e-05, |
| "loss": 1.4234, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.11470805617147081, |
| "grad_norm": 0.8902785836218885, |
| "learning_rate": 1.9987029759960142e-05, |
| "loss": 1.4214, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.11510224193151022, |
| "grad_norm": 0.8858117437885646, |
| "learning_rate": 1.9986319353720353e-05, |
| "loss": 1.3894, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.11549642769154965, |
| "grad_norm": 0.8611263833038788, |
| "learning_rate": 1.9985590020826382e-05, |
| "loss": 1.4862, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.11589061345158906, |
| "grad_norm": 0.8533778158931522, |
| "learning_rate": 1.9984841762660508e-05, |
| "loss": 1.4738, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.11628479921162849, |
| "grad_norm": 0.9054080637678216, |
| "learning_rate": 1.998407458064087e-05, |
| "loss": 1.4873, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.1166789849716679, |
| "grad_norm": 0.8562878911122067, |
| "learning_rate": 1.9983288476221482e-05, |
| "loss": 1.4897, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.11707317073170732, |
| "grad_norm": 0.8857579006622172, |
| "learning_rate": 1.9982483450892206e-05, |
| "loss": 1.4916, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.11746735649174674, |
| "grad_norm": 0.8253228858932441, |
| "learning_rate": 1.9981659506178778e-05, |
| "loss": 1.3489, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.11786154225178616, |
| "grad_norm": 0.9323194384008091, |
| "learning_rate": 1.9980816643642787e-05, |
| "loss": 1.5008, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.11825572801182557, |
| "grad_norm": 1.0570822985529353, |
| "learning_rate": 1.9979954864881672e-05, |
| "loss": 1.4554, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.118649913771865, |
| "grad_norm": 0.9247735264199164, |
| "learning_rate": 1.997907417152873e-05, |
| "loss": 1.4352, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.11904409953190441, |
| "grad_norm": 0.9467585491612563, |
| "learning_rate": 1.9978174565253096e-05, |
| "loss": 1.4937, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.11943828529194384, |
| "grad_norm": 0.9054242752625036, |
| "learning_rate": 1.9977256047759765e-05, |
| "loss": 1.4672, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11983247105198325, |
| "grad_norm": 0.8664782098266539, |
| "learning_rate": 1.997631862078956e-05, |
| "loss": 1.4183, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.12022665681202267, |
| "grad_norm": 0.8736218550959834, |
| "learning_rate": 1.9975362286119145e-05, |
| "loss": 1.4379, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.12062084257206208, |
| "grad_norm": 0.899159416016424, |
| "learning_rate": 1.9974387045561022e-05, |
| "loss": 1.4688, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.12101502833210151, |
| "grad_norm": 0.9132102225776563, |
| "learning_rate": 1.997339290096353e-05, |
| "loss": 1.4195, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.12140921409214092, |
| "grad_norm": 0.9022509743935889, |
| "learning_rate": 1.9972379854210824e-05, |
| "loss": 1.5341, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.12180339985218035, |
| "grad_norm": 0.8909667554707213, |
| "learning_rate": 1.997134790722289e-05, |
| "loss": 1.3896, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.12219758561221976, |
| "grad_norm": 0.810957265048853, |
| "learning_rate": 1.9970297061955533e-05, |
| "loss": 1.3607, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.12259177137225918, |
| "grad_norm": 0.8624805968721132, |
| "learning_rate": 1.996922732040038e-05, |
| "loss": 1.433, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.1229859571322986, |
| "grad_norm": 0.9012262047132807, |
| "learning_rate": 1.9968138684584862e-05, |
| "loss": 1.4337, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.12338014289233802, |
| "grad_norm": 0.8600494551649118, |
| "learning_rate": 1.9967031156572233e-05, |
| "loss": 1.3947, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.12377432865237743, |
| "grad_norm": 0.8744528870589704, |
| "learning_rate": 1.9965904738461534e-05, |
| "loss": 1.4945, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.12416851441241686, |
| "grad_norm": 0.8875872891561535, |
| "learning_rate": 1.9964759432387626e-05, |
| "loss": 1.4542, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.12456270017245627, |
| "grad_norm": 0.8538438066807553, |
| "learning_rate": 1.9963595240521158e-05, |
| "loss": 1.4219, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.1249568859324957, |
| "grad_norm": 0.8583935860681176, |
| "learning_rate": 1.9962412165068575e-05, |
| "loss": 1.3834, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.12535107169253512, |
| "grad_norm": 0.9046850234763439, |
| "learning_rate": 1.996121020827211e-05, |
| "loss": 1.4378, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.12574525745257453, |
| "grad_norm": 0.8757680720234807, |
| "learning_rate": 1.9959989372409777e-05, |
| "loss": 1.4239, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.12613944321261394, |
| "grad_norm": 1.1494791062386092, |
| "learning_rate": 1.9958749659795382e-05, |
| "loss": 1.407, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.12653362897265336, |
| "grad_norm": 0.8689927196254672, |
| "learning_rate": 1.99574910727785e-05, |
| "loss": 1.3873, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.1269278147326928, |
| "grad_norm": 0.8754813889657387, |
| "learning_rate": 1.995621361374447e-05, |
| "loss": 1.522, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1273220004927322, |
| "grad_norm": 0.8486986093611717, |
| "learning_rate": 1.9954917285114418e-05, |
| "loss": 1.3494, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.12771618625277162, |
| "grad_norm": 0.9722206329399001, |
| "learning_rate": 1.9953602089345215e-05, |
| "loss": 1.4088, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.12811037201281103, |
| "grad_norm": 0.8967214452714534, |
| "learning_rate": 1.9952268028929497e-05, |
| "loss": 1.4024, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.12850455777285047, |
| "grad_norm": 0.964703154180979, |
| "learning_rate": 1.995091510639566e-05, |
| "loss": 1.4126, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.12889874353288988, |
| "grad_norm": 0.9392746691898846, |
| "learning_rate": 1.9949543324307828e-05, |
| "loss": 1.405, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.1292929292929293, |
| "grad_norm": 0.7628618547760365, |
| "learning_rate": 1.9948152685265896e-05, |
| "loss": 1.3899, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.1296871150529687, |
| "grad_norm": 0.8699311844515389, |
| "learning_rate": 1.9946743191905473e-05, |
| "loss": 1.3766, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.13008130081300814, |
| "grad_norm": 0.935450510994964, |
| "learning_rate": 1.9945314846897922e-05, |
| "loss": 1.3913, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.13047548657304756, |
| "grad_norm": 0.8529532741122805, |
| "learning_rate": 1.9943867652950323e-05, |
| "loss": 1.3947, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.13086967233308697, |
| "grad_norm": 0.9341157491415716, |
| "learning_rate": 1.9942401612805478e-05, |
| "loss": 1.4517, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.13126385809312638, |
| "grad_norm": 0.8302844629086936, |
| "learning_rate": 1.9940916729241918e-05, |
| "loss": 1.3977, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.13165804385316582, |
| "grad_norm": 0.8260253123890825, |
| "learning_rate": 1.9939413005073873e-05, |
| "loss": 1.4048, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.13205222961320523, |
| "grad_norm": 0.8509245010253166, |
| "learning_rate": 1.9937890443151294e-05, |
| "loss": 1.3836, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.13244641537324464, |
| "grad_norm": 0.9759926385519552, |
| "learning_rate": 1.9936349046359833e-05, |
| "loss": 1.4606, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.13284060113328405, |
| "grad_norm": 0.8472765912232332, |
| "learning_rate": 1.9934788817620827e-05, |
| "loss": 1.3585, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.1332347868933235, |
| "grad_norm": 0.8448284766692432, |
| "learning_rate": 1.9933209759891318e-05, |
| "loss": 1.3559, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.1336289726533629, |
| "grad_norm": 0.8980105866822069, |
| "learning_rate": 1.9931611876164024e-05, |
| "loss": 1.3884, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.13402315841340232, |
| "grad_norm": 0.8035875577985496, |
| "learning_rate": 1.9929995169467346e-05, |
| "loss": 1.4183, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.13441734417344173, |
| "grad_norm": 0.8436688045262849, |
| "learning_rate": 1.992835964286537e-05, |
| "loss": 1.3847, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.13481152993348117, |
| "grad_norm": 0.9086794949433027, |
| "learning_rate": 1.992670529945783e-05, |
| "loss": 1.454, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.13520571569352058, |
| "grad_norm": 0.8037193631752932, |
| "learning_rate": 1.9925032142380144e-05, |
| "loss": 1.4566, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.13559990145356, |
| "grad_norm": 0.9238628826502602, |
| "learning_rate": 1.992334017480337e-05, |
| "loss": 1.4551, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1359940872135994, |
| "grad_norm": 0.8954578526881097, |
| "learning_rate": 1.9921629399934224e-05, |
| "loss": 1.3993, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.13638827297363884, |
| "grad_norm": 0.8298423164388818, |
| "learning_rate": 1.9919899821015066e-05, |
| "loss": 1.4251, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.13678245873367825, |
| "grad_norm": 0.9558363388772838, |
| "learning_rate": 1.99181514413239e-05, |
| "loss": 1.4025, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.13717664449371766, |
| "grad_norm": 0.8459196123850001, |
| "learning_rate": 1.9916384264174354e-05, |
| "loss": 1.3976, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.13757083025375708, |
| "grad_norm": 0.9082414240992348, |
| "learning_rate": 1.9914598292915684e-05, |
| "loss": 1.4128, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.13796501601379652, |
| "grad_norm": 0.8807624601189884, |
| "learning_rate": 1.9912793530932765e-05, |
| "loss": 1.4642, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.13835920177383593, |
| "grad_norm": 0.8479509653794212, |
| "learning_rate": 1.991096998164609e-05, |
| "loss": 1.4292, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.13875338753387534, |
| "grad_norm": 0.8571495642628604, |
| "learning_rate": 1.9909127648511758e-05, |
| "loss": 1.4185, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.13914757329391475, |
| "grad_norm": 0.8394513200646011, |
| "learning_rate": 1.9907266535021465e-05, |
| "loss": 1.3907, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.1395417590539542, |
| "grad_norm": 0.8719559245356892, |
| "learning_rate": 1.9905386644702495e-05, |
| "loss": 1.4522, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1399359448139936, |
| "grad_norm": 0.8304933398455792, |
| "learning_rate": 1.9903487981117732e-05, |
| "loss": 1.37, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.140330130574033, |
| "grad_norm": 1.0554645194699375, |
| "learning_rate": 1.990157054786563e-05, |
| "loss": 1.3502, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.14072431633407242, |
| "grad_norm": 0.7811763156565412, |
| "learning_rate": 1.9899634348580226e-05, |
| "loss": 1.3615, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.14111850209411186, |
| "grad_norm": 0.941990212474433, |
| "learning_rate": 1.9897679386931115e-05, |
| "loss": 1.3639, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.14151268785415128, |
| "grad_norm": 0.814954847959052, |
| "learning_rate": 1.989570566662345e-05, |
| "loss": 1.3888, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1419068736141907, |
| "grad_norm": 0.8608043228373365, |
| "learning_rate": 1.9893713191397944e-05, |
| "loss": 1.3935, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1423010593742301, |
| "grad_norm": 0.890892455025287, |
| "learning_rate": 1.9891701965030855e-05, |
| "loss": 1.4008, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.14269524513426954, |
| "grad_norm": 0.8356857849278824, |
| "learning_rate": 1.9889671991333976e-05, |
| "loss": 1.4298, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.14308943089430895, |
| "grad_norm": 0.9106567824779971, |
| "learning_rate": 1.9887623274154623e-05, |
| "loss": 1.3618, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.14348361665434836, |
| "grad_norm": 0.9437928820477995, |
| "learning_rate": 1.9885555817375656e-05, |
| "loss": 1.4348, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.14387780241438777, |
| "grad_norm": 0.8738867727854848, |
| "learning_rate": 1.988346962491543e-05, |
| "loss": 1.4119, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.1442719881744272, |
| "grad_norm": 0.8544123455118898, |
| "learning_rate": 1.9881364700727827e-05, |
| "loss": 1.3921, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.14466617393446662, |
| "grad_norm": 0.8937019344654401, |
| "learning_rate": 1.9879241048802213e-05, |
| "loss": 1.3936, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.14506035969450604, |
| "grad_norm": 0.8284420958345725, |
| "learning_rate": 1.987709867316346e-05, |
| "loss": 1.4026, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 0.989819294325302, |
| "learning_rate": 1.9874937577871928e-05, |
| "loss": 1.389, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1458487312145849, |
| "grad_norm": 0.7893349138684312, |
| "learning_rate": 1.9872757767023445e-05, |
| "loss": 1.3721, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1462429169746243, |
| "grad_norm": 0.7968967018164466, |
| "learning_rate": 1.9870559244749317e-05, |
| "loss": 1.4324, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.1466371027346637, |
| "grad_norm": 0.8953034923734662, |
| "learning_rate": 1.9868342015216312e-05, |
| "loss": 1.466, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.14703128849470312, |
| "grad_norm": 0.8501443759421378, |
| "learning_rate": 1.986610608262665e-05, |
| "loss": 1.3055, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.14742547425474256, |
| "grad_norm": 0.8315201315122736, |
| "learning_rate": 1.9863851451218006e-05, |
| "loss": 1.3872, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.14781966001478197, |
| "grad_norm": 0.8236250547602466, |
| "learning_rate": 1.9861578125263484e-05, |
| "loss": 1.3778, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.14821384577482138, |
| "grad_norm": 0.8467290646865842, |
| "learning_rate": 1.9859286109071626e-05, |
| "loss": 1.3848, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.1486080315348608, |
| "grad_norm": 0.8755206588442915, |
| "learning_rate": 1.98569754069864e-05, |
| "loss": 1.4124, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.14900221729490024, |
| "grad_norm": 0.8238920848534587, |
| "learning_rate": 1.9854646023387173e-05, |
| "loss": 1.3724, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.14939640305493965, |
| "grad_norm": 0.8349137252265575, |
| "learning_rate": 1.985229796268873e-05, |
| "loss": 1.3722, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.14979058881497906, |
| "grad_norm": 0.8217741172908753, |
| "learning_rate": 1.9849931229341258e-05, |
| "loss": 1.4549, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.15018477457501847, |
| "grad_norm": 0.9356658298644844, |
| "learning_rate": 1.9847545827830327e-05, |
| "loss": 1.3605, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1505789603350579, |
| "grad_norm": 0.8507506609004069, |
| "learning_rate": 1.9845141762676885e-05, |
| "loss": 1.3447, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.15097314609509732, |
| "grad_norm": 0.8752380208196286, |
| "learning_rate": 1.984271903843726e-05, |
| "loss": 1.4148, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.15136733185513673, |
| "grad_norm": 0.9244928793694986, |
| "learning_rate": 1.9840277659703138e-05, |
| "loss": 1.4949, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.15176151761517614, |
| "grad_norm": 0.7660534270592588, |
| "learning_rate": 1.983781763110156e-05, |
| "loss": 1.345, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.15215570337521558, |
| "grad_norm": 0.84775600235801, |
| "learning_rate": 1.983533895729492e-05, |
| "loss": 1.4457, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.152549889135255, |
| "grad_norm": 0.823703175205359, |
| "learning_rate": 1.9832841642980948e-05, |
| "loss": 1.4155, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.1529440748952944, |
| "grad_norm": 0.779646685693002, |
| "learning_rate": 1.983032569289269e-05, |
| "loss": 1.459, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.15333826065533382, |
| "grad_norm": 0.8240076846457852, |
| "learning_rate": 1.9827791111798526e-05, |
| "loss": 1.3924, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.15373244641537326, |
| "grad_norm": 0.8625913690976503, |
| "learning_rate": 1.9825237904502143e-05, |
| "loss": 1.3492, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.15412663217541267, |
| "grad_norm": 0.8365353230811579, |
| "learning_rate": 1.9822666075842527e-05, |
| "loss": 1.4228, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.15452081793545208, |
| "grad_norm": 0.8259908671120344, |
| "learning_rate": 1.9820075630693955e-05, |
| "loss": 1.4015, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1549150036954915, |
| "grad_norm": 0.8637531603835769, |
| "learning_rate": 1.9817466573965996e-05, |
| "loss": 1.4159, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.15530918945553093, |
| "grad_norm": 0.7939363512701786, |
| "learning_rate": 1.981483891060348e-05, |
| "loss": 1.304, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.15570337521557034, |
| "grad_norm": 0.8866031449788612, |
| "learning_rate": 1.981219264558651e-05, |
| "loss": 1.3626, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.15609756097560976, |
| "grad_norm": 0.8228072983791562, |
| "learning_rate": 1.9809527783930444e-05, |
| "loss": 1.3833, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.15649174673564917, |
| "grad_norm": 0.7978736951343444, |
| "learning_rate": 1.980684433068588e-05, |
| "loss": 1.3489, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.1568859324956886, |
| "grad_norm": 0.8786273761217978, |
| "learning_rate": 1.9804142290938654e-05, |
| "loss": 1.3743, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.15728011825572802, |
| "grad_norm": 0.86249011323067, |
| "learning_rate": 1.9801421669809833e-05, |
| "loss": 1.3764, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.15767430401576743, |
| "grad_norm": 0.8732648413397713, |
| "learning_rate": 1.9798682472455694e-05, |
| "loss": 1.4046, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.15806848977580684, |
| "grad_norm": 0.8151084661992906, |
| "learning_rate": 1.979592470406772e-05, |
| "loss": 1.368, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.15846267553584628, |
| "grad_norm": 0.9192834088778115, |
| "learning_rate": 1.97931483698726e-05, |
| "loss": 1.4211, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.1588568612958857, |
| "grad_norm": 0.8163024312946099, |
| "learning_rate": 1.9790353475132206e-05, |
| "loss": 1.3405, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1592510470559251, |
| "grad_norm": 0.8199261685516072, |
| "learning_rate": 1.9787540025143576e-05, |
| "loss": 1.4079, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.15964523281596452, |
| "grad_norm": 0.8218955327149928, |
| "learning_rate": 1.9784708025238935e-05, |
| "loss": 1.3838, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.16003941857600396, |
| "grad_norm": 0.8208820007455779, |
| "learning_rate": 1.9781857480785645e-05, |
| "loss": 1.3688, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.16043360433604337, |
| "grad_norm": 0.8771326041021362, |
| "learning_rate": 1.977898839718623e-05, |
| "loss": 1.4101, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.16082779009608278, |
| "grad_norm": 0.7558042393459081, |
| "learning_rate": 1.9776100779878344e-05, |
| "loss": 1.425, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.1612219758561222, |
| "grad_norm": 0.8739591869924033, |
| "learning_rate": 1.9773194634334764e-05, |
| "loss": 1.379, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.16161616161616163, |
| "grad_norm": 0.7847266820417704, |
| "learning_rate": 1.977026996606339e-05, |
| "loss": 1.3367, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.16201034737620104, |
| "grad_norm": 0.8477635650808805, |
| "learning_rate": 1.9767326780607218e-05, |
| "loss": 1.3511, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.16240453313624045, |
| "grad_norm": 0.8632845728066261, |
| "learning_rate": 1.976436508354435e-05, |
| "loss": 1.3313, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.16279871889627986, |
| "grad_norm": 0.7873959773662924, |
| "learning_rate": 1.9761384880487967e-05, |
| "loss": 1.3409, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1631929046563193, |
| "grad_norm": 0.818419644861465, |
| "learning_rate": 1.9758386177086324e-05, |
| "loss": 1.4273, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.16358709041635872, |
| "grad_norm": 0.8843790656491963, |
| "learning_rate": 1.9755368979022734e-05, |
| "loss": 1.4058, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.16398127617639813, |
| "grad_norm": 0.8545938358336401, |
| "learning_rate": 1.9752333292015565e-05, |
| "loss": 1.4021, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.16437546193643754, |
| "grad_norm": 0.9263197519347521, |
| "learning_rate": 1.9749279121818235e-05, |
| "loss": 1.3893, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.16476964769647698, |
| "grad_norm": 0.7667419924633587, |
| "learning_rate": 1.9746206474219182e-05, |
| "loss": 1.3335, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.1651638334565164, |
| "grad_norm": 0.8481486595457164, |
| "learning_rate": 1.9743115355041868e-05, |
| "loss": 1.3288, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.1655580192165558, |
| "grad_norm": 0.7727894220848658, |
| "learning_rate": 1.9740005770144762e-05, |
| "loss": 1.333, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.1659522049765952, |
| "grad_norm": 0.8607077475883066, |
| "learning_rate": 1.9736877725421325e-05, |
| "loss": 1.4611, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.16634639073663465, |
| "grad_norm": 0.7998454699496479, |
| "learning_rate": 1.9733731226800016e-05, |
| "loss": 1.3622, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.16674057649667406, |
| "grad_norm": 0.7314193043164695, |
| "learning_rate": 1.9730566280244256e-05, |
| "loss": 1.3375, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.16713476225671348, |
| "grad_norm": 0.777752765207413, |
| "learning_rate": 1.9727382891752446e-05, |
| "loss": 1.38, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.1675289480167529, |
| "grad_norm": 0.8338395199460101, |
| "learning_rate": 1.9724181067357918e-05, |
| "loss": 1.3022, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.16792313377679233, |
| "grad_norm": 0.8380585348678756, |
| "learning_rate": 1.9720960813128966e-05, |
| "loss": 1.3745, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.16831731953683174, |
| "grad_norm": 0.8412709090344273, |
| "learning_rate": 1.9717722135168796e-05, |
| "loss": 1.3487, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.16871150529687115, |
| "grad_norm": 0.8188807655558134, |
| "learning_rate": 1.9714465039615545e-05, |
| "loss": 1.4046, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.16910569105691056, |
| "grad_norm": 0.7873789728209534, |
| "learning_rate": 1.9711189532642244e-05, |
| "loss": 1.3695, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.16949987681695, |
| "grad_norm": 0.8380079010888628, |
| "learning_rate": 1.9707895620456832e-05, |
| "loss": 1.4121, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1698940625769894, |
| "grad_norm": 0.7464093486132232, |
| "learning_rate": 1.9704583309302115e-05, |
| "loss": 1.3383, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.17028824833702882, |
| "grad_norm": 0.7745574128518233, |
| "learning_rate": 1.970125260545579e-05, |
| "loss": 1.4293, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.17068243409706824, |
| "grad_norm": 0.7923250648359519, |
| "learning_rate": 1.9697903515230387e-05, |
| "loss": 1.3816, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.17107661985710768, |
| "grad_norm": 0.7828760994144639, |
| "learning_rate": 1.9694536044973303e-05, |
| "loss": 1.3682, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.1714708056171471, |
| "grad_norm": 0.7535267581618733, |
| "learning_rate": 1.9691150201066765e-05, |
| "loss": 1.4415, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.1718649913771865, |
| "grad_norm": 0.7719938628460055, |
| "learning_rate": 1.9687745989927823e-05, |
| "loss": 1.3261, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.1722591771372259, |
| "grad_norm": 0.7985396893057591, |
| "learning_rate": 1.968432341800833e-05, |
| "loss": 1.3384, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.17265336289726535, |
| "grad_norm": 0.7864913353035174, |
| "learning_rate": 1.9680882491794953e-05, |
| "loss": 1.4198, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.17304754865730476, |
| "grad_norm": 0.7652857695438825, |
| "learning_rate": 1.9677423217809127e-05, |
| "loss": 1.4451, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.17344173441734417, |
| "grad_norm": 0.7779886907598241, |
| "learning_rate": 1.9673945602607073e-05, |
| "loss": 1.445, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.17383592017738358, |
| "grad_norm": 0.7526833753446838, |
| "learning_rate": 1.967044965277977e-05, |
| "loss": 1.3715, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.17423010593742302, |
| "grad_norm": 0.7613651093452684, |
| "learning_rate": 1.9666935374952946e-05, |
| "loss": 1.3418, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.17462429169746244, |
| "grad_norm": 0.7407113533991782, |
| "learning_rate": 1.9663402775787066e-05, |
| "loss": 1.3176, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.17501847745750185, |
| "grad_norm": 0.8511077778073948, |
| "learning_rate": 1.9659851861977316e-05, |
| "loss": 1.3712, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.17541266321754126, |
| "grad_norm": 0.7637296441923789, |
| "learning_rate": 1.965628264025359e-05, |
| "loss": 1.3138, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.1758068489775807, |
| "grad_norm": 0.7688575868311163, |
| "learning_rate": 1.9652695117380496e-05, |
| "loss": 1.3478, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.1762010347376201, |
| "grad_norm": 0.8112254863467798, |
| "learning_rate": 1.9649089300157307e-05, |
| "loss": 1.3199, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.17659522049765952, |
| "grad_norm": 0.7773958932143377, |
| "learning_rate": 1.9645465195417986e-05, |
| "loss": 1.3729, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.17698940625769893, |
| "grad_norm": 0.7925758880473086, |
| "learning_rate": 1.9641822810031135e-05, |
| "loss": 1.3545, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.17738359201773837, |
| "grad_norm": 0.7629015638547695, |
| "learning_rate": 1.9638162150900028e-05, |
| "loss": 1.3425, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.7832983576510374, |
| "learning_rate": 1.9634483224962555e-05, |
| "loss": 1.3347, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.1781719635378172, |
| "grad_norm": 0.8341313973861934, |
| "learning_rate": 1.963078603919123e-05, |
| "loss": 1.3995, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.1785661492978566, |
| "grad_norm": 0.7778224652767618, |
| "learning_rate": 1.9627070600593172e-05, |
| "loss": 1.2996, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.17896033505789605, |
| "grad_norm": 0.8243076810986155, |
| "learning_rate": 1.96233369162101e-05, |
| "loss": 1.3893, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.17935452081793546, |
| "grad_norm": 0.8654955959896804, |
| "learning_rate": 1.9619584993118308e-05, |
| "loss": 1.3232, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.17974870657797487, |
| "grad_norm": 0.804527846282048, |
| "learning_rate": 1.9615814838428662e-05, |
| "loss": 1.3656, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.18014289233801428, |
| "grad_norm": 0.7962448753036495, |
| "learning_rate": 1.961202645928658e-05, |
| "loss": 1.3637, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.18053707809805372, |
| "grad_norm": 0.8354245092920538, |
| "learning_rate": 1.960821986287201e-05, |
| "loss": 1.3867, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.18093126385809313, |
| "grad_norm": 0.8345477417237376, |
| "learning_rate": 1.960439505639945e-05, |
| "loss": 1.3931, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.18132544961813254, |
| "grad_norm": 0.9026625490600573, |
| "learning_rate": 1.9600552047117883e-05, |
| "loss": 1.3355, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.18171963537817196, |
| "grad_norm": 0.7381101689953861, |
| "learning_rate": 1.9596690842310807e-05, |
| "loss": 1.3469, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.1821138211382114, |
| "grad_norm": 0.8146270963359201, |
| "learning_rate": 1.9592811449296206e-05, |
| "loss": 1.3754, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.1825080068982508, |
| "grad_norm": 0.7583095033222406, |
| "learning_rate": 1.9588913875426532e-05, |
| "loss": 1.3674, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.18290219265829022, |
| "grad_norm": 0.7547653358304839, |
| "learning_rate": 1.9584998128088686e-05, |
| "loss": 1.3402, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.18329637841832963, |
| "grad_norm": 0.8068714500814903, |
| "learning_rate": 1.958106421470403e-05, |
| "loss": 1.3792, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.18369056417836907, |
| "grad_norm": 0.7623764190926223, |
| "learning_rate": 1.957711214272834e-05, |
| "loss": 1.3683, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.18408474993840848, |
| "grad_norm": 0.7327762464326012, |
| "learning_rate": 1.957314191965182e-05, |
| "loss": 1.3321, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.1844789356984479, |
| "grad_norm": 0.8050214138929509, |
| "learning_rate": 1.9569153552999057e-05, |
| "loss": 1.4045, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.1848731214584873, |
| "grad_norm": 0.7931062968671917, |
| "learning_rate": 1.9565147050329046e-05, |
| "loss": 1.3676, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.18526730721852674, |
| "grad_norm": 0.7329041782778525, |
| "learning_rate": 1.9561122419235137e-05, |
| "loss": 1.3468, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.18566149297856616, |
| "grad_norm": 0.7706739838708203, |
| "learning_rate": 1.955707966734505e-05, |
| "loss": 1.3456, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.18605567873860557, |
| "grad_norm": 0.7721590455864087, |
| "learning_rate": 1.9553018802320843e-05, |
| "loss": 1.383, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.18644986449864498, |
| "grad_norm": 0.7426283570331748, |
| "learning_rate": 1.95489398318589e-05, |
| "loss": 1.3125, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.18684405025868442, |
| "grad_norm": 2.063311743166772, |
| "learning_rate": 1.9544842763689928e-05, |
| "loss": 1.4202, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.18723823601872383, |
| "grad_norm": 0.7311089489840802, |
| "learning_rate": 1.954072760557893e-05, |
| "loss": 1.2622, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.18763242177876324, |
| "grad_norm": 0.781806989985732, |
| "learning_rate": 1.953659436532519e-05, |
| "loss": 1.3805, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.18802660753880265, |
| "grad_norm": 0.8019278871709516, |
| "learning_rate": 1.9532443050762265e-05, |
| "loss": 1.3006, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.1884207932988421, |
| "grad_norm": 0.7493676971003281, |
| "learning_rate": 1.9528273669757974e-05, |
| "loss": 1.2912, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.1888149790588815, |
| "grad_norm": 0.8268984543433072, |
| "learning_rate": 1.9524086230214366e-05, |
| "loss": 1.3565, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.18920916481892092, |
| "grad_norm": 0.7801443400096512, |
| "learning_rate": 1.951988074006772e-05, |
| "loss": 1.371, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.18960335057896033, |
| "grad_norm": 0.7539695626008661, |
| "learning_rate": 1.9515657207288528e-05, |
| "loss": 1.3721, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.18999753633899977, |
| "grad_norm": 0.7703572570935576, |
| "learning_rate": 1.9511415639881474e-05, |
| "loss": 1.4442, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.19039172209903918, |
| "grad_norm": 0.7742745558792156, |
| "learning_rate": 1.9507156045885423e-05, |
| "loss": 1.2905, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1907859078590786, |
| "grad_norm": 0.7359869825956976, |
| "learning_rate": 1.950287843337341e-05, |
| "loss": 1.3254, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.191180093619118, |
| "grad_norm": 0.7544568408416208, |
| "learning_rate": 1.9498582810452607e-05, |
| "loss": 1.3154, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.19157427937915744, |
| "grad_norm": 0.7769753768513467, |
| "learning_rate": 1.949426918526434e-05, |
| "loss": 1.3628, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.19196846513919685, |
| "grad_norm": 0.7834189136520097, |
| "learning_rate": 1.9489937565984033e-05, |
| "loss": 1.3554, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.19236265089923626, |
| "grad_norm": 0.7796538796113698, |
| "learning_rate": 1.948558796082123e-05, |
| "loss": 1.2925, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.19275683665927568, |
| "grad_norm": 1.0372440968179562, |
| "learning_rate": 1.9481220378019553e-05, |
| "loss": 1.309, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.19315102241931512, |
| "grad_norm": 0.727717117732363, |
| "learning_rate": 1.9476834825856696e-05, |
| "loss": 1.353, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.19354520817935453, |
| "grad_norm": 0.7330989067981496, |
| "learning_rate": 1.947243131264442e-05, |
| "loss": 1.3326, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.19393939393939394, |
| "grad_norm": 0.8625663326931535, |
| "learning_rate": 1.9468009846728515e-05, |
| "loss": 1.3795, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.19433357969943335, |
| "grad_norm": 0.7442872681943762, |
| "learning_rate": 1.9463570436488803e-05, |
| "loss": 1.3343, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.1947277654594728, |
| "grad_norm": 0.7892831285816906, |
| "learning_rate": 1.9459113090339107e-05, |
| "loss": 1.4112, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.1951219512195122, |
| "grad_norm": 0.7915084905242407, |
| "learning_rate": 1.945463781672726e-05, |
| "loss": 1.3867, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1955161369795516, |
| "grad_norm": 0.7558768011341099, |
| "learning_rate": 1.945014462413505e-05, |
| "loss": 1.2735, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.19591032273959103, |
| "grad_norm": 0.7918551795385935, |
| "learning_rate": 1.9445633521078246e-05, |
| "loss": 1.366, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.19630450849963046, |
| "grad_norm": 0.7632462761447605, |
| "learning_rate": 1.944110451610655e-05, |
| "loss": 1.2919, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.19669869425966988, |
| "grad_norm": 0.8619242283408518, |
| "learning_rate": 1.9436557617803594e-05, |
| "loss": 1.3433, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.1970928800197093, |
| "grad_norm": 0.7486074296088833, |
| "learning_rate": 1.943199283478693e-05, |
| "loss": 1.3718, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1974870657797487, |
| "grad_norm": 0.7844981757900801, |
| "learning_rate": 1.9427410175707993e-05, |
| "loss": 1.3615, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.19788125153978814, |
| "grad_norm": 0.7861270837445861, |
| "learning_rate": 1.942280964925211e-05, |
| "loss": 1.4269, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.19827543729982755, |
| "grad_norm": 0.7771387444238573, |
| "learning_rate": 1.9418191264138468e-05, |
| "loss": 1.3861, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.19866962305986696, |
| "grad_norm": 0.7840229669644916, |
| "learning_rate": 1.94135550291201e-05, |
| "loss": 1.3508, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.19906380881990637, |
| "grad_norm": 0.7578091088675099, |
| "learning_rate": 1.940890095298386e-05, |
| "loss": 1.3, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1994579945799458, |
| "grad_norm": 0.7955186622031103, |
| "learning_rate": 1.9404229044550432e-05, |
| "loss": 1.3877, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.19985218033998522, |
| "grad_norm": 0.7600697521641491, |
| "learning_rate": 1.939953931267429e-05, |
| "loss": 1.3083, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.20024636610002464, |
| "grad_norm": 0.7997760910789501, |
| "learning_rate": 1.9394831766243688e-05, |
| "loss": 1.3574, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.20064055186006405, |
| "grad_norm": 0.8324601470930124, |
| "learning_rate": 1.9390106414180635e-05, |
| "loss": 1.3314, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.2010347376201035, |
| "grad_norm": 0.7986181347574611, |
| "learning_rate": 1.9385363265440896e-05, |
| "loss": 1.3701, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2014289233801429, |
| "grad_norm": 0.8390387581661004, |
| "learning_rate": 1.9380602329013967e-05, |
| "loss": 1.3278, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.2018231091401823, |
| "grad_norm": 0.7756267484264265, |
| "learning_rate": 1.937582361392305e-05, |
| "loss": 1.2902, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.20221729490022172, |
| "grad_norm": 0.8280742083628098, |
| "learning_rate": 1.9371027129225042e-05, |
| "loss": 1.3954, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.20261148066026116, |
| "grad_norm": 0.7557033928381056, |
| "learning_rate": 1.9366212884010523e-05, |
| "loss": 1.3245, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.20300566642030057, |
| "grad_norm": 0.7339490880913666, |
| "learning_rate": 1.9361380887403726e-05, |
| "loss": 1.3314, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.20339985218033999, |
| "grad_norm": 0.759110598024447, |
| "learning_rate": 1.935653114856254e-05, |
| "loss": 1.3075, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.2037940379403794, |
| "grad_norm": 0.7330136521742119, |
| "learning_rate": 1.9351663676678465e-05, |
| "loss": 1.3105, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.20418822370041884, |
| "grad_norm": 0.8396501916315762, |
| "learning_rate": 1.9346778480976626e-05, |
| "loss": 1.3555, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.20458240946045825, |
| "grad_norm": 0.7833213499224854, |
| "learning_rate": 1.9341875570715723e-05, |
| "loss": 1.393, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.20497659522049766, |
| "grad_norm": 0.788388912099959, |
| "learning_rate": 1.9336954955188042e-05, |
| "loss": 1.3548, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.20537078098053707, |
| "grad_norm": 0.7944142250573871, |
| "learning_rate": 1.9332016643719413e-05, |
| "loss": 1.3167, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.2057649667405765, |
| "grad_norm": 0.7185170009516036, |
| "learning_rate": 1.932706064566922e-05, |
| "loss": 1.2763, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.20615915250061592, |
| "grad_norm": 0.7625422306230389, |
| "learning_rate": 1.9322086970430355e-05, |
| "loss": 1.2991, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.20655333826065533, |
| "grad_norm": 0.7528804400146271, |
| "learning_rate": 1.9317095627429215e-05, |
| "loss": 1.2744, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.20694752402069475, |
| "grad_norm": 0.7235339004181085, |
| "learning_rate": 1.931208662612569e-05, |
| "loss": 1.3023, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.20734170978073418, |
| "grad_norm": 0.7485454145610042, |
| "learning_rate": 1.930705997601313e-05, |
| "loss": 1.2737, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.2077358955407736, |
| "grad_norm": 0.7616817297855956, |
| "learning_rate": 1.9302015686618328e-05, |
| "loss": 1.3331, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.208130081300813, |
| "grad_norm": 0.7224963273000136, |
| "learning_rate": 1.929695376750152e-05, |
| "loss": 1.3113, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.20852426706085242, |
| "grad_norm": 0.7117066935208167, |
| "learning_rate": 1.9291874228256355e-05, |
| "loss": 1.3536, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.20891845282089186, |
| "grad_norm": 0.7620668487908003, |
| "learning_rate": 1.928677707850986e-05, |
| "loss": 1.3847, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.20931263858093127, |
| "grad_norm": 0.7762645227174237, |
| "learning_rate": 1.9281662327922458e-05, |
| "loss": 1.3838, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.20970682434097068, |
| "grad_norm": 0.7486355068094747, |
| "learning_rate": 1.9276529986187925e-05, |
| "loss": 1.2929, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.2101010101010101, |
| "grad_norm": 0.7850761598989443, |
| "learning_rate": 1.9271380063033368e-05, |
| "loss": 1.3511, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.21049519586104953, |
| "grad_norm": 0.7306901593960397, |
| "learning_rate": 1.9266212568219223e-05, |
| "loss": 1.3223, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.21088938162108894, |
| "grad_norm": 0.8035850088778281, |
| "learning_rate": 1.9261027511539227e-05, |
| "loss": 1.3615, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.21128356738112836, |
| "grad_norm": 0.7359933674500054, |
| "learning_rate": 1.9255824902820403e-05, |
| "loss": 1.3733, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.21167775314116777, |
| "grad_norm": 0.7361755019126336, |
| "learning_rate": 1.9250604751923035e-05, |
| "loss": 1.2759, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.2120719389012072, |
| "grad_norm": 0.7731391184456793, |
| "learning_rate": 1.9245367068740664e-05, |
| "loss": 1.3493, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.21246612466124662, |
| "grad_norm": 0.7070141898804634, |
| "learning_rate": 1.9240111863200047e-05, |
| "loss": 1.3316, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.21286031042128603, |
| "grad_norm": 0.7047293130221922, |
| "learning_rate": 1.9234839145261154e-05, |
| "loss": 1.309, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.21325449618132544, |
| "grad_norm": 0.7787357081571815, |
| "learning_rate": 1.9229548924917146e-05, |
| "loss": 1.3572, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.21364868194136488, |
| "grad_norm": 0.7390906175625679, |
| "learning_rate": 1.9224241212194364e-05, |
| "loss": 1.3855, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.2140428677014043, |
| "grad_norm": 0.7348457458913636, |
| "learning_rate": 1.9218916017152292e-05, |
| "loss": 1.3093, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.2144370534614437, |
| "grad_norm": 0.752656550237857, |
| "learning_rate": 1.9213573349883545e-05, |
| "loss": 1.4028, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.21483123922148312, |
| "grad_norm": 0.7244840658804366, |
| "learning_rate": 1.9208213220513866e-05, |
| "loss": 1.2963, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.21522542498152256, |
| "grad_norm": 0.770992566259173, |
| "learning_rate": 1.9202835639202075e-05, |
| "loss": 1.2926, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.21561961074156197, |
| "grad_norm": 0.7643194008638872, |
| "learning_rate": 1.919744061614008e-05, |
| "loss": 1.3145, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.21601379650160138, |
| "grad_norm": 0.7366196627549643, |
| "learning_rate": 1.9192028161552848e-05, |
| "loss": 1.3536, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.2164079822616408, |
| "grad_norm": 0.6968551530472608, |
| "learning_rate": 1.9186598285698373e-05, |
| "loss": 1.3063, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.21680216802168023, |
| "grad_norm": 0.7641280477443396, |
| "learning_rate": 1.9181150998867674e-05, |
| "loss": 1.3252, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.21719635378171964, |
| "grad_norm": 0.7864006183375085, |
| "learning_rate": 1.9175686311384763e-05, |
| "loss": 1.2925, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.21759053954175905, |
| "grad_norm": 0.7510317585657532, |
| "learning_rate": 1.917020423360664e-05, |
| "loss": 1.3147, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.21798472530179847, |
| "grad_norm": 0.759753668019818, |
| "learning_rate": 1.9164704775923258e-05, |
| "loss": 1.2949, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.2183789110618379, |
| "grad_norm": 0.7730004582439941, |
| "learning_rate": 1.9159187948757503e-05, |
| "loss": 1.2885, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.21877309682187732, |
| "grad_norm": 0.7672020235507695, |
| "learning_rate": 1.915365376256519e-05, |
| "loss": 1.3914, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.21916728258191673, |
| "grad_norm": 0.752157061906444, |
| "learning_rate": 1.9148102227835033e-05, |
| "loss": 1.3487, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.21956146834195614, |
| "grad_norm": 0.7278798351850428, |
| "learning_rate": 1.9142533355088628e-05, |
| "loss": 1.3303, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.21995565410199558, |
| "grad_norm": 0.7104471440585667, |
| "learning_rate": 1.9136947154880413e-05, |
| "loss": 1.3193, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.220349839862035, |
| "grad_norm": 0.7800638989095695, |
| "learning_rate": 1.9131343637797695e-05, |
| "loss": 1.3536, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.2207440256220744, |
| "grad_norm": 0.7109099389345059, |
| "learning_rate": 1.9125722814460582e-05, |
| "loss": 1.2976, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.22113821138211381, |
| "grad_norm": 0.709861315894559, |
| "learning_rate": 1.912008469552198e-05, |
| "loss": 1.3534, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.22153239714215325, |
| "grad_norm": 0.7625065746820054, |
| "learning_rate": 1.9114429291667583e-05, |
| "loss": 1.3593, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.22192658290219267, |
| "grad_norm": 0.8957024180712038, |
| "learning_rate": 1.9108756613615846e-05, |
| "loss": 1.2796, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.22232076866223208, |
| "grad_norm": 0.756013792651535, |
| "learning_rate": 1.9103066672117957e-05, |
| "loss": 1.2989, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2227149544222715, |
| "grad_norm": 0.7162732062615748, |
| "learning_rate": 1.9097359477957825e-05, |
| "loss": 1.2601, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.22310914018231093, |
| "grad_norm": 0.7436938571603158, |
| "learning_rate": 1.9091635041952052e-05, |
| "loss": 1.3151, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.22350332594235034, |
| "grad_norm": 0.7610549683893325, |
| "learning_rate": 1.9085893374949926e-05, |
| "loss": 1.2972, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.22389751170238975, |
| "grad_norm": 0.7558082450692344, |
| "learning_rate": 1.9080134487833393e-05, |
| "loss": 1.3793, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.22429169746242916, |
| "grad_norm": 0.7719491717906157, |
| "learning_rate": 1.9074358391517026e-05, |
| "loss": 1.3779, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.2246858832224686, |
| "grad_norm": 0.7374690493690355, |
| "learning_rate": 1.9068565096948017e-05, |
| "loss": 1.3406, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.225080068982508, |
| "grad_norm": 0.7538369331733002, |
| "learning_rate": 1.9062754615106162e-05, |
| "loss": 1.2936, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.22547425474254743, |
| "grad_norm": 0.7296271125635926, |
| "learning_rate": 1.905692695700382e-05, |
| "loss": 1.3447, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.22586844050258684, |
| "grad_norm": 0.8084596790033229, |
| "learning_rate": 1.905108213368591e-05, |
| "loss": 1.2637, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.22626262626262628, |
| "grad_norm": 0.7557777464040102, |
| "learning_rate": 1.904522015622988e-05, |
| "loss": 1.3563, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2266568120226657, |
| "grad_norm": 0.7483236106401496, |
| "learning_rate": 1.9039341035745696e-05, |
| "loss": 1.2815, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2270509977827051, |
| "grad_norm": 0.8169659004896286, |
| "learning_rate": 1.9033444783375806e-05, |
| "loss": 1.2968, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.2274451835427445, |
| "grad_norm": 0.7564345089200964, |
| "learning_rate": 1.9027531410295128e-05, |
| "loss": 1.2903, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.22783936930278395, |
| "grad_norm": 0.740064034653702, |
| "learning_rate": 1.9021600927711037e-05, |
| "loss": 1.3115, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.22823355506282336, |
| "grad_norm": 0.7536666281291825, |
| "learning_rate": 1.9015653346863322e-05, |
| "loss": 1.2815, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.22862774082286277, |
| "grad_norm": 0.7332255399421099, |
| "learning_rate": 1.900968867902419e-05, |
| "loss": 1.2896, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.22902192658290219, |
| "grad_norm": 0.7215272966131613, |
| "learning_rate": 1.9003706935498233e-05, |
| "loss": 1.3181, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.22941611234294162, |
| "grad_norm": 0.8275893204395051, |
| "learning_rate": 1.8997708127622384e-05, |
| "loss": 1.293, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.22981029810298104, |
| "grad_norm": 0.7495958353788804, |
| "learning_rate": 1.8991692266765947e-05, |
| "loss": 1.2679, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.23020448386302045, |
| "grad_norm": 0.7772101723875109, |
| "learning_rate": 1.8985659364330522e-05, |
| "loss": 1.325, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.23059866962305986, |
| "grad_norm": 0.7489454768012945, |
| "learning_rate": 1.8979609431750025e-05, |
| "loss": 1.2757, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.2309928553830993, |
| "grad_norm": 0.7612569479113607, |
| "learning_rate": 1.8973542480490636e-05, |
| "loss": 1.3161, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.2313870411431387, |
| "grad_norm": 0.8016105305619344, |
| "learning_rate": 1.89674585220508e-05, |
| "loss": 1.3373, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.23178122690317812, |
| "grad_norm": 0.7552521095717978, |
| "learning_rate": 1.8961357567961182e-05, |
| "loss": 1.3341, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.23217541266321753, |
| "grad_norm": 0.8077575349160561, |
| "learning_rate": 1.8955239629784667e-05, |
| "loss": 1.3828, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.23256959842325697, |
| "grad_norm": 0.7734481164743204, |
| "learning_rate": 1.8949104719116334e-05, |
| "loss": 1.2494, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.23296378418329639, |
| "grad_norm": 0.7239243239882402, |
| "learning_rate": 1.8942952847583417e-05, |
| "loss": 1.3492, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.2333579699433358, |
| "grad_norm": 0.7392668666857419, |
| "learning_rate": 1.8936784026845304e-05, |
| "loss": 1.2988, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.2337521557033752, |
| "grad_norm": 0.737345549169784, |
| "learning_rate": 1.8930598268593503e-05, |
| "loss": 1.3593, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.23414634146341465, |
| "grad_norm": 0.7739820026696098, |
| "learning_rate": 1.8924395584551624e-05, |
| "loss": 1.2917, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.23454052722345406, |
| "grad_norm": 0.7370299572384036, |
| "learning_rate": 1.891817598647535e-05, |
| "loss": 1.3188, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.23493471298349347, |
| "grad_norm": 0.7045735291814132, |
| "learning_rate": 1.8911939486152433e-05, |
| "loss": 1.2999, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.23532889874353288, |
| "grad_norm": 0.7318502745854408, |
| "learning_rate": 1.8905686095402648e-05, |
| "loss": 1.2973, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.23572308450357232, |
| "grad_norm": 0.6992717345016547, |
| "learning_rate": 1.8899415826077784e-05, |
| "loss": 1.2562, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.23611727026361173, |
| "grad_norm": 0.7855449422876546, |
| "learning_rate": 1.8893128690061625e-05, |
| "loss": 1.3331, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.23651145602365115, |
| "grad_norm": 0.7330330982965301, |
| "learning_rate": 1.8886824699269916e-05, |
| "loss": 1.2719, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.23690564178369056, |
| "grad_norm": 0.7235999574209688, |
| "learning_rate": 1.888050386565034e-05, |
| "loss": 1.2848, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.23729982754373, |
| "grad_norm": 0.7259572083243264, |
| "learning_rate": 1.8874166201182526e-05, |
| "loss": 1.2901, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.2376940133037694, |
| "grad_norm": 0.738733374260345, |
| "learning_rate": 1.8867811717877966e-05, |
| "loss": 1.2949, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.23808819906380882, |
| "grad_norm": 0.7293917944233541, |
| "learning_rate": 1.886144042778006e-05, |
| "loss": 1.2738, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.23848238482384823, |
| "grad_norm": 0.7004391383451308, |
| "learning_rate": 1.885505234296404e-05, |
| "loss": 1.2703, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.23887657058388767, |
| "grad_norm": 0.7664560785377862, |
| "learning_rate": 1.884864747553698e-05, |
| "loss": 1.3647, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.23927075634392708, |
| "grad_norm": 0.8048750538355759, |
| "learning_rate": 1.8842225837637765e-05, |
| "loss": 1.4858, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.2396649421039665, |
| "grad_norm": 0.7886892188335735, |
| "learning_rate": 1.8835787441437043e-05, |
| "loss": 1.3808, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.2400591278640059, |
| "grad_norm": 0.700691895354596, |
| "learning_rate": 1.8829332299137245e-05, |
| "loss": 1.3073, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.24045331362404535, |
| "grad_norm": 0.749597801010302, |
| "learning_rate": 1.882286042297254e-05, |
| "loss": 1.3656, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.24084749938408476, |
| "grad_norm": 0.7481923330312744, |
| "learning_rate": 1.881637182520879e-05, |
| "loss": 1.3272, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.24124168514412417, |
| "grad_norm": 0.6957757781146582, |
| "learning_rate": 1.880986651814357e-05, |
| "loss": 1.2368, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.24163587090416358, |
| "grad_norm": 0.7428959152728734, |
| "learning_rate": 1.8803344514106123e-05, |
| "loss": 1.3561, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.24203005666420302, |
| "grad_norm": 0.733482247697521, |
| "learning_rate": 1.8796805825457324e-05, |
| "loss": 1.3296, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.24242424242424243, |
| "grad_norm": 0.7941648551428049, |
| "learning_rate": 1.8790250464589676e-05, |
| "loss": 1.3018, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.24281842818428184, |
| "grad_norm": 0.7864984021030504, |
| "learning_rate": 1.8783678443927282e-05, |
| "loss": 1.3507, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.24321261394432125, |
| "grad_norm": 0.7607319722931054, |
| "learning_rate": 1.8777089775925822e-05, |
| "loss": 1.3028, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.2436067997043607, |
| "grad_norm": 0.7531520087715251, |
| "learning_rate": 1.8770484473072518e-05, |
| "loss": 1.337, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.2440009854644001, |
| "grad_norm": 0.7227583108021773, |
| "learning_rate": 1.8763862547886133e-05, |
| "loss": 1.3006, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.24439517122443952, |
| "grad_norm": 0.7244215425325586, |
| "learning_rate": 1.8757224012916913e-05, |
| "loss": 1.3111, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.24478935698447893, |
| "grad_norm": 0.726809176042967, |
| "learning_rate": 1.8750568880746606e-05, |
| "loss": 1.2595, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.24518354274451837, |
| "grad_norm": 0.7409190065458727, |
| "learning_rate": 1.87438971639884e-05, |
| "loss": 1.2985, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.24557772850455778, |
| "grad_norm": 0.7027463402470976, |
| "learning_rate": 1.8737208875286933e-05, |
| "loss": 1.2993, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.2459719142645972, |
| "grad_norm": 0.7354741797652073, |
| "learning_rate": 1.8730504027318223e-05, |
| "loss": 1.3101, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.2463661000246366, |
| "grad_norm": 0.7151055215992336, |
| "learning_rate": 1.87237826327897e-05, |
| "loss": 1.3016, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.24676028578467604, |
| "grad_norm": 0.7346955837306206, |
| "learning_rate": 1.871704470444014e-05, |
| "loss": 1.3026, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.24715447154471545, |
| "grad_norm": 0.7087046803059532, |
| "learning_rate": 1.8710290255039654e-05, |
| "loss": 1.3149, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.24754865730475487, |
| "grad_norm": 0.7301865796459245, |
| "learning_rate": 1.870351929738967e-05, |
| "loss": 1.2857, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.24794284306479428, |
| "grad_norm": 0.7189028712874932, |
| "learning_rate": 1.86967318443229e-05, |
| "loss": 1.3185, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.24833702882483372, |
| "grad_norm": 0.6879300842588244, |
| "learning_rate": 1.8689927908703325e-05, |
| "loss": 1.2882, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.24873121458487313, |
| "grad_norm": 0.6980954368807367, |
| "learning_rate": 1.8683107503426158e-05, |
| "loss": 1.2522, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.24912540034491254, |
| "grad_norm": 0.7545776954574633, |
| "learning_rate": 1.8676270641417824e-05, |
| "loss": 1.322, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.24951958610495195, |
| "grad_norm": 0.7115077185501087, |
| "learning_rate": 1.8669417335635946e-05, |
| "loss": 1.2723, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.2499137718649914, |
| "grad_norm": 0.7379949770472353, |
| "learning_rate": 1.866254759906931e-05, |
| "loss": 1.4362, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.2503079576250308, |
| "grad_norm": 0.7573308426125499, |
| "learning_rate": 1.8655661444737835e-05, |
| "loss": 1.3177, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.25070214338507024, |
| "grad_norm": 0.7257743669215548, |
| "learning_rate": 1.864875888569257e-05, |
| "loss": 1.3062, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.25109632914510965, |
| "grad_norm": 0.6940203952508667, |
| "learning_rate": 1.864183993501564e-05, |
| "loss": 1.2652, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.25149051490514907, |
| "grad_norm": 0.8172564591114041, |
| "learning_rate": 1.863490460582025e-05, |
| "loss": 1.3199, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.2518847006651885, |
| "grad_norm": 0.7226317764207526, |
| "learning_rate": 1.8627952911250632e-05, |
| "loss": 1.3106, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.2522788864252279, |
| "grad_norm": 0.7438657902645007, |
| "learning_rate": 1.8620984864482046e-05, |
| "loss": 1.2981, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2526730721852673, |
| "grad_norm": 0.7422399467375352, |
| "learning_rate": 1.8614000478720743e-05, |
| "loss": 1.3406, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.2530672579453067, |
| "grad_norm": 0.7811618617681046, |
| "learning_rate": 1.860699976720393e-05, |
| "loss": 1.3105, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.2534614437053461, |
| "grad_norm": 0.7398963519463426, |
| "learning_rate": 1.8599982743199775e-05, |
| "loss": 1.3194, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.2538556294653856, |
| "grad_norm": 0.7614275275857106, |
| "learning_rate": 1.859294942000734e-05, |
| "loss": 1.2825, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.254249815225425, |
| "grad_norm": 0.7495597529607684, |
| "learning_rate": 1.85858998109566e-05, |
| "loss": 1.2941, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2546440009854644, |
| "grad_norm": 0.76715001759035, |
| "learning_rate": 1.857883392940837e-05, |
| "loss": 1.3126, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2550381867455038, |
| "grad_norm": 0.7357189271424588, |
| "learning_rate": 1.8571751788754336e-05, |
| "loss": 1.3363, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.25543237250554324, |
| "grad_norm": 0.7382893718452418, |
| "learning_rate": 1.856465340241697e-05, |
| "loss": 1.2237, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.25582655826558265, |
| "grad_norm": 0.7377308175335368, |
| "learning_rate": 1.8557538783849555e-05, |
| "loss": 1.2561, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.25622074402562206, |
| "grad_norm": 0.7792573574030509, |
| "learning_rate": 1.8550407946536127e-05, |
| "loss": 1.2835, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.25661492978566147, |
| "grad_norm": 0.8268845473577122, |
| "learning_rate": 1.8543260903991467e-05, |
| "loss": 1.2624, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.25700911554570094, |
| "grad_norm": 0.7139020431429061, |
| "learning_rate": 1.8536097669761066e-05, |
| "loss": 1.2767, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.25740330130574035, |
| "grad_norm": 0.836771495489938, |
| "learning_rate": 1.85289182574211e-05, |
| "loss": 1.2564, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.25779748706577976, |
| "grad_norm": 0.7744188165849301, |
| "learning_rate": 1.8521722680578413e-05, |
| "loss": 1.3551, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.2581916728258192, |
| "grad_norm": 0.7733400605257766, |
| "learning_rate": 1.851451095287048e-05, |
| "loss": 1.3511, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.2585858585858586, |
| "grad_norm": 0.7813471536798385, |
| "learning_rate": 1.850728308796539e-05, |
| "loss": 1.2426, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.258980044345898, |
| "grad_norm": 0.7708022669200939, |
| "learning_rate": 1.8500039099561807e-05, |
| "loss": 1.2708, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.2593742301059374, |
| "grad_norm": 0.7838881723591813, |
| "learning_rate": 1.8492779001388964e-05, |
| "loss": 1.3396, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.2597684158659768, |
| "grad_norm": 0.7443818910969162, |
| "learning_rate": 1.8485502807206624e-05, |
| "loss": 1.3021, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.2601626016260163, |
| "grad_norm": 0.7268444207695822, |
| "learning_rate": 1.847821053080505e-05, |
| "loss": 1.3232, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2605567873860557, |
| "grad_norm": 0.7145438455342924, |
| "learning_rate": 1.8470902186004995e-05, |
| "loss": 1.2762, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.2609509731460951, |
| "grad_norm": 0.798127221257281, |
| "learning_rate": 1.8463577786657653e-05, |
| "loss": 1.3434, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.2613451589061345, |
| "grad_norm": 0.8286302645386731, |
| "learning_rate": 1.845623734664465e-05, |
| "loss": 1.3648, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.26173934466617393, |
| "grad_norm": 0.7056475119658424, |
| "learning_rate": 1.8448880879878026e-05, |
| "loss": 1.2664, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.26213353042621335, |
| "grad_norm": 0.7486227238349661, |
| "learning_rate": 1.844150840030018e-05, |
| "loss": 1.3144, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.26252771618625276, |
| "grad_norm": 0.7252618893757948, |
| "learning_rate": 1.8434119921883865e-05, |
| "loss": 1.2523, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.26292190194629217, |
| "grad_norm": 0.7522705686940889, |
| "learning_rate": 1.8426715458632154e-05, |
| "loss": 1.3312, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.26331608770633164, |
| "grad_norm": 0.7442803975025406, |
| "learning_rate": 1.8419295024578417e-05, |
| "loss": 1.3162, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.26371027346637105, |
| "grad_norm": 0.7428662761759469, |
| "learning_rate": 1.8411858633786298e-05, |
| "loss": 1.3616, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.26410445922641046, |
| "grad_norm": 0.6883090253519637, |
| "learning_rate": 1.8404406300349673e-05, |
| "loss": 1.2775, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.26449864498644987, |
| "grad_norm": 0.7298650894749236, |
| "learning_rate": 1.8396938038392636e-05, |
| "loss": 1.2973, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.2648928307464893, |
| "grad_norm": 0.7210785949379522, |
| "learning_rate": 1.838945386206948e-05, |
| "loss": 1.2651, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.2652870165065287, |
| "grad_norm": 0.7455429622427832, |
| "learning_rate": 1.8381953785564653e-05, |
| "loss": 1.2784, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.2656812022665681, |
| "grad_norm": 0.7101554754335506, |
| "learning_rate": 1.8374437823092726e-05, |
| "loss": 1.2153, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.2660753880266075, |
| "grad_norm": 0.7052828798902647, |
| "learning_rate": 1.836690598889839e-05, |
| "loss": 1.2874, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.266469573786647, |
| "grad_norm": 0.7102957673047738, |
| "learning_rate": 1.835935829725643e-05, |
| "loss": 1.3323, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.2668637595466864, |
| "grad_norm": 0.7113208099408921, |
| "learning_rate": 1.8351794762471656e-05, |
| "loss": 1.2808, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.2672579453067258, |
| "grad_norm": 0.713012458638494, |
| "learning_rate": 1.8344215398878925e-05, |
| "loss": 1.2499, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2676521310667652, |
| "grad_norm": 0.7458478391351581, |
| "learning_rate": 1.833662022084309e-05, |
| "loss": 1.2379, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.26804631682680463, |
| "grad_norm": 0.6955091694637261, |
| "learning_rate": 1.8329009242758977e-05, |
| "loss": 1.2148, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.26844050258684404, |
| "grad_norm": 0.7331960366798272, |
| "learning_rate": 1.832138247905135e-05, |
| "loss": 1.3051, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.26883468834688345, |
| "grad_norm": 0.7207567261465225, |
| "learning_rate": 1.8313739944174894e-05, |
| "loss": 1.3065, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.26922887410692287, |
| "grad_norm": 0.7148277245246873, |
| "learning_rate": 1.8306081652614192e-05, |
| "loss": 1.2788, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.26962305986696233, |
| "grad_norm": 0.7155577906316034, |
| "learning_rate": 1.829840761888368e-05, |
| "loss": 1.2429, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.27001724562700175, |
| "grad_norm": 0.696356161317749, |
| "learning_rate": 1.829071785752764e-05, |
| "loss": 1.2729, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.27041143138704116, |
| "grad_norm": 0.7128716614175701, |
| "learning_rate": 1.8283012383120148e-05, |
| "loss": 1.3227, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.27080561714708057, |
| "grad_norm": 0.7465800322640285, |
| "learning_rate": 1.827529121026507e-05, |
| "loss": 1.3252, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.27119980290712, |
| "grad_norm": 0.8172136430700996, |
| "learning_rate": 1.8267554353596027e-05, |
| "loss": 1.2756, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.2715939886671594, |
| "grad_norm": 0.7347557447163089, |
| "learning_rate": 1.8259801827776358e-05, |
| "loss": 1.2878, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.2719881744271988, |
| "grad_norm": 0.6960464962207745, |
| "learning_rate": 1.82520336474991e-05, |
| "loss": 1.2508, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2723823601872382, |
| "grad_norm": 0.7323542648353354, |
| "learning_rate": 1.8244249827486962e-05, |
| "loss": 1.3276, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2727765459472777, |
| "grad_norm": 0.7334410491777583, |
| "learning_rate": 1.8236450382492293e-05, |
| "loss": 1.2446, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.2731707317073171, |
| "grad_norm": 0.7700697100142729, |
| "learning_rate": 1.8228635327297054e-05, |
| "loss": 1.2647, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.2735649174673565, |
| "grad_norm": 0.6868021899359485, |
| "learning_rate": 1.8220804676712797e-05, |
| "loss": 1.2585, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.2739591032273959, |
| "grad_norm": 0.7056110870773941, |
| "learning_rate": 1.8212958445580623e-05, |
| "loss": 1.2978, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.27435328898743533, |
| "grad_norm": 0.7042929029435405, |
| "learning_rate": 1.8205096648771166e-05, |
| "loss": 1.2778, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.27474747474747474, |
| "grad_norm": 0.7960978757280552, |
| "learning_rate": 1.8197219301184565e-05, |
| "loss": 1.3364, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.27514166050751415, |
| "grad_norm": 0.7288353276886701, |
| "learning_rate": 1.818932641775043e-05, |
| "loss": 1.3099, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.27553584626755356, |
| "grad_norm": 0.7479924057933423, |
| "learning_rate": 1.81814180134278e-05, |
| "loss": 1.3429, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.27593003202759303, |
| "grad_norm": 0.7715814930725846, |
| "learning_rate": 1.817349410320516e-05, |
| "loss": 1.2634, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.27632421778763244, |
| "grad_norm": 0.7186502326915973, |
| "learning_rate": 1.816555470210036e-05, |
| "loss": 1.2677, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.27671840354767185, |
| "grad_norm": 0.6963815556934851, |
| "learning_rate": 1.815759982516061e-05, |
| "loss": 1.2738, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.27711258930771127, |
| "grad_norm": 0.725935134036574, |
| "learning_rate": 1.8149629487462466e-05, |
| "loss": 1.3357, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2775067750677507, |
| "grad_norm": 0.7440336010726357, |
| "learning_rate": 1.814164370411177e-05, |
| "loss": 1.3394, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.2779009608277901, |
| "grad_norm": 0.7144497832774677, |
| "learning_rate": 1.8133642490243642e-05, |
| "loss": 1.3247, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.2782951465878295, |
| "grad_norm": 0.7330387391854017, |
| "learning_rate": 1.8125625861022455e-05, |
| "loss": 1.3037, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.2786893323478689, |
| "grad_norm": 0.7408644571783576, |
| "learning_rate": 1.8117593831641788e-05, |
| "loss": 1.2714, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.2790835181079084, |
| "grad_norm": 0.7538056025050238, |
| "learning_rate": 1.810954641732441e-05, |
| "loss": 1.2744, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2794777038679478, |
| "grad_norm": 0.7178383604389642, |
| "learning_rate": 1.8101483633322255e-05, |
| "loss": 1.3522, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2798718896279872, |
| "grad_norm": 0.7286512088304942, |
| "learning_rate": 1.8093405494916373e-05, |
| "loss": 1.2913, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2802660753880266, |
| "grad_norm": 0.7524538518197109, |
| "learning_rate": 1.8085312017416926e-05, |
| "loss": 1.3544, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.280660261148066, |
| "grad_norm": 0.7789095889944275, |
| "learning_rate": 1.8077203216163145e-05, |
| "loss": 1.3328, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.28105444690810544, |
| "grad_norm": 0.7027682398341476, |
| "learning_rate": 1.8069079106523303e-05, |
| "loss": 1.316, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.28144863266814485, |
| "grad_norm": 0.71974038692439, |
| "learning_rate": 1.8060939703894684e-05, |
| "loss": 1.3089, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.28184281842818426, |
| "grad_norm": 0.750073440309824, |
| "learning_rate": 1.805278502370356e-05, |
| "loss": 1.28, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.28223700418822373, |
| "grad_norm": 0.7157617956836964, |
| "learning_rate": 1.8044615081405153e-05, |
| "loss": 1.2604, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.28263118994826314, |
| "grad_norm": 0.7094277876635081, |
| "learning_rate": 1.8036429892483615e-05, |
| "loss": 1.2041, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.28302537570830255, |
| "grad_norm": 0.6869213238799484, |
| "learning_rate": 1.8028229472451994e-05, |
| "loss": 1.2326, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.28341956146834196, |
| "grad_norm": 0.7609339774943211, |
| "learning_rate": 1.80200138368522e-05, |
| "loss": 1.2778, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2838137472283814, |
| "grad_norm": 0.7445388720919836, |
| "learning_rate": 1.801178300125499e-05, |
| "loss": 1.3466, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2842079329884208, |
| "grad_norm": 0.75543054063603, |
| "learning_rate": 1.800353698125992e-05, |
| "loss": 1.2684, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2846021187484602, |
| "grad_norm": 0.7126562502264812, |
| "learning_rate": 1.7995275792495327e-05, |
| "loss": 1.3145, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2849963045084996, |
| "grad_norm": 0.750515516790499, |
| "learning_rate": 1.7986999450618295e-05, |
| "loss": 1.2766, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.2853904902685391, |
| "grad_norm": 0.7302431877687291, |
| "learning_rate": 1.7978707971314636e-05, |
| "loss": 1.2127, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.2857846760285785, |
| "grad_norm": 0.7122551920492798, |
| "learning_rate": 1.797040137029884e-05, |
| "loss": 1.2589, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.2861788617886179, |
| "grad_norm": 0.7938703124948006, |
| "learning_rate": 1.796207966331406e-05, |
| "loss": 1.3729, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.2865730475486573, |
| "grad_norm": 0.7541217984200421, |
| "learning_rate": 1.7953742866132082e-05, |
| "loss": 1.2927, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.2869672333086967, |
| "grad_norm": 0.7255479166779722, |
| "learning_rate": 1.794539099455329e-05, |
| "loss": 1.3431, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.28736141906873613, |
| "grad_norm": 0.7453202011835943, |
| "learning_rate": 1.7937024064406637e-05, |
| "loss": 1.2764, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.28775560482877555, |
| "grad_norm": 0.7449089241310055, |
| "learning_rate": 1.7928642091549616e-05, |
| "loss": 1.2666, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.28814979058881496, |
| "grad_norm": 0.688535746874336, |
| "learning_rate": 1.792024509186823e-05, |
| "loss": 1.2396, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.2885439763488544, |
| "grad_norm": 0.7179660403513343, |
| "learning_rate": 1.7911833081276962e-05, |
| "loss": 1.2404, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.28893816210889384, |
| "grad_norm": 0.6957846541829211, |
| "learning_rate": 1.7903406075718744e-05, |
| "loss": 1.3032, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.28933234786893325, |
| "grad_norm": 0.7453327673964074, |
| "learning_rate": 1.7894964091164932e-05, |
| "loss": 1.3043, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.28972653362897266, |
| "grad_norm": 0.6889929678498284, |
| "learning_rate": 1.788650714361526e-05, |
| "loss": 1.2273, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.29012071938901207, |
| "grad_norm": 0.7514828515828875, |
| "learning_rate": 1.787803524909783e-05, |
| "loss": 1.232, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.2905149051490515, |
| "grad_norm": 0.69838877253169, |
| "learning_rate": 1.7869548423669075e-05, |
| "loss": 1.1814, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 0.7028140683366864, |
| "learning_rate": 1.7861046683413717e-05, |
| "loss": 1.3324, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2913032766691303, |
| "grad_norm": 0.7609333767596239, |
| "learning_rate": 1.785253004444475e-05, |
| "loss": 1.3309, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2916974624291698, |
| "grad_norm": 0.6993070009969047, |
| "learning_rate": 1.78439985229034e-05, |
| "loss": 1.2958, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2920916481892092, |
| "grad_norm": 0.7895491591304246, |
| "learning_rate": 1.7835452134959112e-05, |
| "loss": 1.2721, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.2924858339492486, |
| "grad_norm": 0.7484581002135297, |
| "learning_rate": 1.7826890896809492e-05, |
| "loss": 1.2696, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.292880019709288, |
| "grad_norm": 0.7180118235912724, |
| "learning_rate": 1.78183148246803e-05, |
| "loss": 1.3026, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.2932742054693274, |
| "grad_norm": 0.7821323900052215, |
| "learning_rate": 1.7809723934825405e-05, |
| "loss": 1.244, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.29366839122936683, |
| "grad_norm": 0.731279597221484, |
| "learning_rate": 1.7801118243526764e-05, |
| "loss": 1.2841, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.29406257698940624, |
| "grad_norm": 0.7328987907210074, |
| "learning_rate": 1.7792497767094384e-05, |
| "loss": 1.2574, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.29445676274944566, |
| "grad_norm": 0.7546401708479835, |
| "learning_rate": 1.7783862521866296e-05, |
| "loss": 1.2514, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.2948509485094851, |
| "grad_norm": 0.6961282567593424, |
| "learning_rate": 1.7775212524208513e-05, |
| "loss": 1.2659, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.29524513426952453, |
| "grad_norm": 0.7069163112336031, |
| "learning_rate": 1.776654779051502e-05, |
| "loss": 1.2231, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.29563932002956395, |
| "grad_norm": 0.7257978049323676, |
| "learning_rate": 1.775786833720773e-05, |
| "loss": 1.2728, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.29603350578960336, |
| "grad_norm": 0.7560009441390841, |
| "learning_rate": 1.7749174180736443e-05, |
| "loss": 1.2819, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.29642769154964277, |
| "grad_norm": 0.6956575266835414, |
| "learning_rate": 1.7740465337578823e-05, |
| "loss": 1.3005, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.2968218773096822, |
| "grad_norm": 0.7079492136542035, |
| "learning_rate": 1.7731741824240385e-05, |
| "loss": 1.227, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.2972160630697216, |
| "grad_norm": 0.7184097566051775, |
| "learning_rate": 1.7723003657254447e-05, |
| "loss": 1.2924, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.297610248829761, |
| "grad_norm": 0.6854141387606205, |
| "learning_rate": 1.771425085318208e-05, |
| "loss": 1.2557, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.29800443458980047, |
| "grad_norm": 0.6879860581907943, |
| "learning_rate": 1.7705483428612114e-05, |
| "loss": 1.2204, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2983986203498399, |
| "grad_norm": 0.7067053556944854, |
| "learning_rate": 1.7696701400161077e-05, |
| "loss": 1.2709, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.2987928061098793, |
| "grad_norm": 0.6684898845941895, |
| "learning_rate": 1.768790478447319e-05, |
| "loss": 1.2379, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.2991869918699187, |
| "grad_norm": 0.7669440743034426, |
| "learning_rate": 1.7679093598220305e-05, |
| "loss": 1.2965, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2995811776299581, |
| "grad_norm": 0.7264067182866932, |
| "learning_rate": 1.7670267858101895e-05, |
| "loss": 1.3299, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.29997536338999753, |
| "grad_norm": 0.7154874058477277, |
| "learning_rate": 1.766142758084502e-05, |
| "loss": 1.2714, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.30036954915003694, |
| "grad_norm": 0.7339691526122842, |
| "learning_rate": 1.7652572783204286e-05, |
| "loss": 1.2567, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.30076373491007635, |
| "grad_norm": 0.7113428916700398, |
| "learning_rate": 1.764370348196183e-05, |
| "loss": 1.2466, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.3011579206701158, |
| "grad_norm": 0.7468376219349876, |
| "learning_rate": 1.7634819693927254e-05, |
| "loss": 1.2894, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.30155210643015523, |
| "grad_norm": 0.706632725084111, |
| "learning_rate": 1.762592143593764e-05, |
| "loss": 1.2872, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.30194629219019464, |
| "grad_norm": 0.6794782711352044, |
| "learning_rate": 1.761700872485748e-05, |
| "loss": 1.2807, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.30234047795023405, |
| "grad_norm": 0.7244853098320986, |
| "learning_rate": 1.7608081577578665e-05, |
| "loss": 1.2835, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.30273466371027347, |
| "grad_norm": 0.778447414784227, |
| "learning_rate": 1.759914001102045e-05, |
| "loss": 1.2765, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.3031288494703129, |
| "grad_norm": 0.6969578931450477, |
| "learning_rate": 1.7590184042129406e-05, |
| "loss": 1.231, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.3035230352303523, |
| "grad_norm": 0.6772342269604559, |
| "learning_rate": 1.758121368787941e-05, |
| "loss": 1.2599, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.3039172209903917, |
| "grad_norm": 0.7659352446323853, |
| "learning_rate": 1.7572228965271595e-05, |
| "loss": 1.2728, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.30431140675043117, |
| "grad_norm": 0.7140083484092759, |
| "learning_rate": 1.756322989133434e-05, |
| "loss": 1.273, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.3047055925104706, |
| "grad_norm": 0.7580395855737478, |
| "learning_rate": 1.7554216483123205e-05, |
| "loss": 1.257, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.30509977827051, |
| "grad_norm": 0.7139671098163918, |
| "learning_rate": 1.7545188757720933e-05, |
| "loss": 1.2526, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.3054939640305494, |
| "grad_norm": 0.7180915186637021, |
| "learning_rate": 1.753614673223739e-05, |
| "loss": 1.284, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.3058881497905888, |
| "grad_norm": 0.6906674260442509, |
| "learning_rate": 1.7527090423809553e-05, |
| "loss": 1.3048, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.3062823355506282, |
| "grad_norm": 0.6975851458655973, |
| "learning_rate": 1.7518019849601466e-05, |
| "loss": 1.2902, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.30667652131066764, |
| "grad_norm": 0.7046928833082814, |
| "learning_rate": 1.7508935026804202e-05, |
| "loss": 1.2339, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.30707070707070705, |
| "grad_norm": 0.7051521547776037, |
| "learning_rate": 1.749983597263586e-05, |
| "loss": 1.2921, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.3074648928307465, |
| "grad_norm": 0.6736469006003648, |
| "learning_rate": 1.749072270434148e-05, |
| "loss": 1.271, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.30785907859078593, |
| "grad_norm": 1.9120037647074484, |
| "learning_rate": 1.7481595239193073e-05, |
| "loss": 1.2196, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.30825326435082534, |
| "grad_norm": 0.72077851804003, |
| "learning_rate": 1.747245359448954e-05, |
| "loss": 1.2623, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.30864745011086475, |
| "grad_norm": 0.6879089595866057, |
| "learning_rate": 1.7463297787556656e-05, |
| "loss": 1.2604, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.30904163587090416, |
| "grad_norm": 0.7126887694269388, |
| "learning_rate": 1.745412783574704e-05, |
| "loss": 1.2688, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.3094358216309436, |
| "grad_norm": 0.6783349938024574, |
| "learning_rate": 1.744494375644012e-05, |
| "loss": 1.2142, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.309830007390983, |
| "grad_norm": 0.7591782870663694, |
| "learning_rate": 1.7435745567042096e-05, |
| "loss": 1.3246, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.3102241931510224, |
| "grad_norm": 0.7137080648341777, |
| "learning_rate": 1.7426533284985912e-05, |
| "loss": 1.256, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.31061837891106187, |
| "grad_norm": 0.712242808651282, |
| "learning_rate": 1.7417306927731226e-05, |
| "loss": 1.2504, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.3110125646711013, |
| "grad_norm": 0.7706834788124493, |
| "learning_rate": 1.7408066512764365e-05, |
| "loss": 1.2842, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.3114067504311407, |
| "grad_norm": 0.6756575206343757, |
| "learning_rate": 1.73988120575983e-05, |
| "loss": 1.2302, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.3118009361911801, |
| "grad_norm": 0.7172786293209685, |
| "learning_rate": 1.7389543579772613e-05, |
| "loss": 1.2746, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.3121951219512195, |
| "grad_norm": 0.7114990921157863, |
| "learning_rate": 1.738026109685347e-05, |
| "loss": 1.247, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.3125893077112589, |
| "grad_norm": 0.7464653029721845, |
| "learning_rate": 1.737096462643357e-05, |
| "loss": 1.2843, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.31298349347129834, |
| "grad_norm": 0.7246251283451155, |
| "learning_rate": 1.736165418613212e-05, |
| "loss": 1.2896, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.31337767923133775, |
| "grad_norm": 0.709039744798614, |
| "learning_rate": 1.7352329793594817e-05, |
| "loss": 1.2729, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.3137718649913772, |
| "grad_norm": 0.7184347792609641, |
| "learning_rate": 1.7342991466493785e-05, |
| "loss": 1.3516, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.3141660507514166, |
| "grad_norm": 0.677698026889925, |
| "learning_rate": 1.7333639222527572e-05, |
| "loss": 1.2565, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.31456023651145604, |
| "grad_norm": 0.7345054222302991, |
| "learning_rate": 1.732427307942109e-05, |
| "loss": 1.2509, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.31495442227149545, |
| "grad_norm": 0.7766755838188357, |
| "learning_rate": 1.7314893054925604e-05, |
| "loss": 1.2766, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.31534860803153486, |
| "grad_norm": 0.8110496899704974, |
| "learning_rate": 1.730549916681868e-05, |
| "loss": 1.3387, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.31574279379157427, |
| "grad_norm": 0.7332603361275668, |
| "learning_rate": 1.7296091432904164e-05, |
| "loss": 1.3232, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.3161369795516137, |
| "grad_norm": 0.7406352642846648, |
| "learning_rate": 1.728666987101214e-05, |
| "loss": 1.2996, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.3165311653116531, |
| "grad_norm": 0.7257385239706662, |
| "learning_rate": 1.7277234498998897e-05, |
| "loss": 1.2809, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.31692535107169256, |
| "grad_norm": 0.7450615958562268, |
| "learning_rate": 1.726778533474691e-05, |
| "loss": 1.2937, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.317319536831732, |
| "grad_norm": 0.7062517786301892, |
| "learning_rate": 1.725832239616478e-05, |
| "loss": 1.3006, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.3177137225917714, |
| "grad_norm": 0.7080667822251828, |
| "learning_rate": 1.724884570118722e-05, |
| "loss": 1.2349, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.3181079083518108, |
| "grad_norm": 0.7066931019098044, |
| "learning_rate": 1.723935526777502e-05, |
| "loss": 1.2272, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.3185020941118502, |
| "grad_norm": 0.6946668338018744, |
| "learning_rate": 1.722985111391499e-05, |
| "loss": 1.2962, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.3188962798718896, |
| "grad_norm": 0.6796597060520128, |
| "learning_rate": 1.7220333257619967e-05, |
| "loss": 1.3037, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.31929046563192903, |
| "grad_norm": 1.6609616990291973, |
| "learning_rate": 1.721080171692874e-05, |
| "loss": 1.3676, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.31968465139196844, |
| "grad_norm": 0.7455397950852571, |
| "learning_rate": 1.720125650990605e-05, |
| "loss": 1.2693, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.3200788371520079, |
| "grad_norm": 1.8102002609851213, |
| "learning_rate": 1.7191697654642517e-05, |
| "loss": 1.443, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.3204730229120473, |
| "grad_norm": 1.6105677014342337, |
| "learning_rate": 1.7182125169254646e-05, |
| "loss": 1.3548, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.32086720867208673, |
| "grad_norm": 1.9398768550889596, |
| "learning_rate": 1.717253907188477e-05, |
| "loss": 1.3585, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.32126139443212615, |
| "grad_norm": 1.628604424489859, |
| "learning_rate": 1.716293938070102e-05, |
| "loss": 1.3206, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.32165558019216556, |
| "grad_norm": 2.801181103409832, |
| "learning_rate": 1.7153326113897286e-05, |
| "loss": 1.4204, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.32204976595220497, |
| "grad_norm": 1.0130939786005846, |
| "learning_rate": 1.7143699289693193e-05, |
| "loss": 1.2738, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.3224439517122444, |
| "grad_norm": 6.872564216473981, |
| "learning_rate": 1.7134058926334063e-05, |
| "loss": 1.262, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.3228381374722838, |
| "grad_norm": 1.4200836123074054, |
| "learning_rate": 1.7124405042090865e-05, |
| "loss": 1.3799, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.32323232323232326, |
| "grad_norm": 5.08400535142629, |
| "learning_rate": 1.711473765526021e-05, |
| "loss": 1.3092, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.32362650899236267, |
| "grad_norm": 1.5849311506474677, |
| "learning_rate": 1.7105056784164295e-05, |
| "loss": 1.2599, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.3240206947524021, |
| "grad_norm": 1.0013431185133732, |
| "learning_rate": 1.7095362447150866e-05, |
| "loss": 1.3207, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.3244148805124415, |
| "grad_norm": 0.6866727508748066, |
| "learning_rate": 1.7085654662593192e-05, |
| "loss": 1.2265, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.3248090662724809, |
| "grad_norm": 0.7423237770798616, |
| "learning_rate": 1.7075933448890037e-05, |
| "loss": 1.2494, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.3252032520325203, |
| "grad_norm": 0.7327984292482648, |
| "learning_rate": 1.706619882446561e-05, |
| "loss": 1.2826, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.32559743779255973, |
| "grad_norm": 0.8307141447009255, |
| "learning_rate": 1.7056450807769543e-05, |
| "loss": 1.3328, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.32599162355259914, |
| "grad_norm": 0.7685568008883157, |
| "learning_rate": 1.7046689417276836e-05, |
| "loss": 1.2668, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3263858093126386, |
| "grad_norm": 0.7143149682827579, |
| "learning_rate": 1.7036914671487854e-05, |
| "loss": 1.3147, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.326779995072678, |
| "grad_norm": 0.7441227072240346, |
| "learning_rate": 1.7027126588928255e-05, |
| "loss": 1.2662, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.32717418083271743, |
| "grad_norm": 0.8549422472836754, |
| "learning_rate": 1.701732518814899e-05, |
| "loss": 1.2276, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.32756836659275684, |
| "grad_norm": 0.7104822685684634, |
| "learning_rate": 1.7007510487726247e-05, |
| "loss": 1.2174, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.32796255235279625, |
| "grad_norm": 0.7990258038527759, |
| "learning_rate": 1.699768250626141e-05, |
| "loss": 1.2084, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.32835673811283567, |
| "grad_norm": 0.7941920583151476, |
| "learning_rate": 1.698784126238105e-05, |
| "loss": 1.3014, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3287509238728751, |
| "grad_norm": 0.7565823644252784, |
| "learning_rate": 1.697798677473686e-05, |
| "loss": 1.3198, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3291451096329145, |
| "grad_norm": 0.776895609925856, |
| "learning_rate": 1.6968119062005644e-05, |
| "loss": 1.3171, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.32953929539295396, |
| "grad_norm": 0.7511145926401521, |
| "learning_rate": 1.6958238142889258e-05, |
| "loss": 1.2645, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.32993348115299337, |
| "grad_norm": 0.8590843085742348, |
| "learning_rate": 1.6948344036114604e-05, |
| "loss": 1.2381, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.3303276669130328, |
| "grad_norm": 0.7298728955089272, |
| "learning_rate": 1.6938436760433565e-05, |
| "loss": 1.2919, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.3307218526730722, |
| "grad_norm": 0.723873691001796, |
| "learning_rate": 1.6928516334622988e-05, |
| "loss": 1.2859, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.3311160384331116, |
| "grad_norm": 0.6739547357750979, |
| "learning_rate": 1.6918582777484642e-05, |
| "loss": 1.2698, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.331510224193151, |
| "grad_norm": 0.7603942315040987, |
| "learning_rate": 1.690863610784518e-05, |
| "loss": 1.3326, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3319044099531904, |
| "grad_norm": 0.7428516273827751, |
| "learning_rate": 1.689867634455612e-05, |
| "loss": 1.3044, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.33229859571322984, |
| "grad_norm": 0.6987204595473288, |
| "learning_rate": 1.6888703506493774e-05, |
| "loss": 1.2418, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3326927814732693, |
| "grad_norm": 0.6798174720438129, |
| "learning_rate": 1.687871761255925e-05, |
| "loss": 1.2692, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.3330869672333087, |
| "grad_norm": 0.6812029162107662, |
| "learning_rate": 1.6868718681678397e-05, |
| "loss": 1.2651, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.33348115299334813, |
| "grad_norm": 5.833213521596053, |
| "learning_rate": 1.6858706732801767e-05, |
| "loss": 1.2184, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.33387533875338754, |
| "grad_norm": 2.1210809511503856, |
| "learning_rate": 1.6848681784904597e-05, |
| "loss": 1.3386, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.33426952451342695, |
| "grad_norm": 4.2587995536151135, |
| "learning_rate": 1.6838643856986746e-05, |
| "loss": 1.2538, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.33466371027346636, |
| "grad_norm": 0.814091566447592, |
| "learning_rate": 1.682859296807268e-05, |
| "loss": 1.2472, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.3350578960335058, |
| "grad_norm": 0.7308070804439674, |
| "learning_rate": 1.6818529137211427e-05, |
| "loss": 1.222, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3354520817935452, |
| "grad_norm": 0.733680332929859, |
| "learning_rate": 1.680845238347655e-05, |
| "loss": 1.2992, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.33584626755358465, |
| "grad_norm": 0.7265681835122267, |
| "learning_rate": 1.6798362725966102e-05, |
| "loss": 1.2956, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.33624045331362407, |
| "grad_norm": 0.7402397151917712, |
| "learning_rate": 1.6788260183802586e-05, |
| "loss": 1.3171, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3366346390736635, |
| "grad_norm": 0.7137092615288991, |
| "learning_rate": 1.6778144776132927e-05, |
| "loss": 1.2102, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3370288248337029, |
| "grad_norm": 0.7156854110239057, |
| "learning_rate": 1.6768016522128435e-05, |
| "loss": 1.3038, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.3374230105937423, |
| "grad_norm": 0.711623409866771, |
| "learning_rate": 1.675787544098477e-05, |
| "loss": 1.2436, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.3378171963537817, |
| "grad_norm": 0.7171571327488878, |
| "learning_rate": 1.6747721551921894e-05, |
| "loss": 1.2316, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.3382113821138211, |
| "grad_norm": 0.8547583498487163, |
| "learning_rate": 1.6737554874184058e-05, |
| "loss": 1.2736, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.33860556787386054, |
| "grad_norm": 0.7302470996316592, |
| "learning_rate": 1.6727375427039734e-05, |
| "loss": 1.3211, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.3389997536339, |
| "grad_norm": 0.8374723063663263, |
| "learning_rate": 1.671718322978161e-05, |
| "loss": 1.22, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.3393939393939394, |
| "grad_norm": 0.6807758814646102, |
| "learning_rate": 1.6706978301726523e-05, |
| "loss": 1.1737, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.3397881251539788, |
| "grad_norm": 0.8925191795209313, |
| "learning_rate": 1.6696760662215457e-05, |
| "loss": 1.3089, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.34018231091401824, |
| "grad_norm": 0.7669197207119955, |
| "learning_rate": 1.6686530330613472e-05, |
| "loss": 1.2567, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.34057649667405765, |
| "grad_norm": 0.7863821853939692, |
| "learning_rate": 1.6676287326309684e-05, |
| "loss": 1.2913, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.34097068243409706, |
| "grad_norm": 0.7288234899543948, |
| "learning_rate": 1.6666031668717246e-05, |
| "loss": 1.2282, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3413648681941365, |
| "grad_norm": 0.7392427569586649, |
| "learning_rate": 1.6655763377273258e-05, |
| "loss": 1.2523, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.3417590539541759, |
| "grad_norm": 0.773906001259452, |
| "learning_rate": 1.6645482471438805e-05, |
| "loss": 1.2792, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.34215323971421535, |
| "grad_norm": 0.7307235011238918, |
| "learning_rate": 1.6635188970698843e-05, |
| "loss": 1.2767, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.34254742547425476, |
| "grad_norm": 0.7781474135830119, |
| "learning_rate": 1.662488289456222e-05, |
| "loss": 1.2846, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.3429416112342942, |
| "grad_norm": 0.7962078143230832, |
| "learning_rate": 1.661456426256161e-05, |
| "loss": 1.256, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.3433357969943336, |
| "grad_norm": 0.6984713096930648, |
| "learning_rate": 1.660423309425349e-05, |
| "loss": 1.2114, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.343729982754373, |
| "grad_norm": 0.9653083144870128, |
| "learning_rate": 1.6593889409218084e-05, |
| "loss": 1.27, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.3441241685144124, |
| "grad_norm": 0.7327421492980511, |
| "learning_rate": 1.6583533227059353e-05, |
| "loss": 1.2789, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.3445183542744518, |
| "grad_norm": 0.7398126983540253, |
| "learning_rate": 1.657316456740494e-05, |
| "loss": 1.3085, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.34491254003449123, |
| "grad_norm": 0.7299557711967728, |
| "learning_rate": 1.656278344990612e-05, |
| "loss": 1.2173, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3453067257945307, |
| "grad_norm": 0.6863138322240955, |
| "learning_rate": 1.6552389894237806e-05, |
| "loss": 1.2902, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3457009115545701, |
| "grad_norm": 0.7199868674478724, |
| "learning_rate": 1.6541983920098462e-05, |
| "loss": 1.2807, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.3460950973146095, |
| "grad_norm": 0.7634746076633273, |
| "learning_rate": 1.6531565547210095e-05, |
| "loss": 1.2891, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.34648928307464893, |
| "grad_norm": 0.7334440482002302, |
| "learning_rate": 1.6521134795318214e-05, |
| "loss": 1.2927, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.34688346883468835, |
| "grad_norm": 0.7223249271668641, |
| "learning_rate": 1.6510691684191795e-05, |
| "loss": 1.328, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.34727765459472776, |
| "grad_norm": 0.7283270674703335, |
| "learning_rate": 1.650023623362322e-05, |
| "loss": 1.2518, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.34767184035476717, |
| "grad_norm": 0.6859617703188744, |
| "learning_rate": 1.648976846342827e-05, |
| "loss": 1.2036, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.3480660261148066, |
| "grad_norm": 0.743057000636584, |
| "learning_rate": 1.647928839344608e-05, |
| "loss": 1.1975, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.34846021187484605, |
| "grad_norm": 0.8879799533842352, |
| "learning_rate": 1.6468796043539082e-05, |
| "loss": 1.2689, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.34885439763488546, |
| "grad_norm": 0.8750572793943686, |
| "learning_rate": 1.645829143359299e-05, |
| "loss": 1.2318, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.34924858339492487, |
| "grad_norm": 0.7446142347770219, |
| "learning_rate": 1.6447774583516756e-05, |
| "loss": 1.2977, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.3496427691549643, |
| "grad_norm": 0.7504423660668825, |
| "learning_rate": 1.6437245513242523e-05, |
| "loss": 1.2924, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3500369549150037, |
| "grad_norm": 0.7101861154635718, |
| "learning_rate": 1.6426704242725603e-05, |
| "loss": 1.2577, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3504311406750431, |
| "grad_norm": 0.747939528808994, |
| "learning_rate": 1.6416150791944422e-05, |
| "loss": 1.258, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3508253264350825, |
| "grad_norm": 0.8886537060733, |
| "learning_rate": 1.640558518090049e-05, |
| "loss": 1.2302, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.35121951219512193, |
| "grad_norm": 0.7590526147979498, |
| "learning_rate": 1.639500742961838e-05, |
| "loss": 1.2814, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.3516136979551614, |
| "grad_norm": 0.7361888142899841, |
| "learning_rate": 1.6384417558145654e-05, |
| "loss": 1.284, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.3520078837152008, |
| "grad_norm": 0.7328949864046489, |
| "learning_rate": 1.637381558655286e-05, |
| "loss": 1.2238, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.3524020694752402, |
| "grad_norm": 0.7763585243100655, |
| "learning_rate": 1.6363201534933465e-05, |
| "loss": 1.2669, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.35279625523527963, |
| "grad_norm": 0.7724373870079227, |
| "learning_rate": 1.635257542340384e-05, |
| "loss": 1.2572, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.35319044099531904, |
| "grad_norm": 0.7384217206450774, |
| "learning_rate": 1.6341937272103213e-05, |
| "loss": 1.2394, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.35358462675535846, |
| "grad_norm": 0.910247717689576, |
| "learning_rate": 1.6331287101193625e-05, |
| "loss": 1.2368, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.35397881251539787, |
| "grad_norm": 0.7158162891901805, |
| "learning_rate": 1.6320624930859905e-05, |
| "loss": 1.2402, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.3543729982754373, |
| "grad_norm": 0.8329732085362143, |
| "learning_rate": 1.6309950781309612e-05, |
| "loss": 1.2966, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.35476718403547675, |
| "grad_norm": 0.8155246854171831, |
| "learning_rate": 1.6299264672773025e-05, |
| "loss": 1.2497, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.35516136979551616, |
| "grad_norm": 0.7837030128107672, |
| "learning_rate": 1.6288566625503076e-05, |
| "loss": 1.2868, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.7235353047172081, |
| "learning_rate": 1.627785665977532e-05, |
| "loss": 1.3201, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.355949741315595, |
| "grad_norm": 0.7380179619209855, |
| "learning_rate": 1.6267134795887914e-05, |
| "loss": 1.3081, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.3563439270756344, |
| "grad_norm": 0.7592157290500411, |
| "learning_rate": 1.6256401054161565e-05, |
| "loss": 1.1903, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.3567381128356738, |
| "grad_norm": 0.7467318769646345, |
| "learning_rate": 1.6245655454939474e-05, |
| "loss": 1.2442, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.3571322985957132, |
| "grad_norm": 0.8375209294354106, |
| "learning_rate": 1.6234898018587336e-05, |
| "loss": 1.3645, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.3575264843557526, |
| "grad_norm": 0.6897682274849407, |
| "learning_rate": 1.622412876549327e-05, |
| "loss": 1.2427, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.3579206701157921, |
| "grad_norm": 0.6863050257352118, |
| "learning_rate": 1.621334771606778e-05, |
| "loss": 1.2618, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.3583148558758315, |
| "grad_norm": 0.7753517670222771, |
| "learning_rate": 1.6202554890743754e-05, |
| "loss": 1.3007, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3587090416358709, |
| "grad_norm": 0.7259581040390859, |
| "learning_rate": 1.619175030997638e-05, |
| "loss": 1.2528, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.35910322739591033, |
| "grad_norm": 0.7718789856797308, |
| "learning_rate": 1.6180933994243123e-05, |
| "loss": 1.3085, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.35949741315594974, |
| "grad_norm": 0.7146087165544308, |
| "learning_rate": 1.6170105964043698e-05, |
| "loss": 1.2306, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.35989159891598915, |
| "grad_norm": 0.7346445190650487, |
| "learning_rate": 1.6159266239900015e-05, |
| "loss": 1.2984, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.36028578467602856, |
| "grad_norm": 0.6888116952571305, |
| "learning_rate": 1.614841484235616e-05, |
| "loss": 1.2657, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.360679970436068, |
| "grad_norm": 0.6989568807671639, |
| "learning_rate": 1.6137551791978325e-05, |
| "loss": 1.2347, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.36107415619610744, |
| "grad_norm": 0.6755063819703383, |
| "learning_rate": 1.61266771093548e-05, |
| "loss": 1.2551, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.36146834195614685, |
| "grad_norm": 0.6534472286383475, |
| "learning_rate": 1.6115790815095914e-05, |
| "loss": 1.1829, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.36186252771618627, |
| "grad_norm": 0.7262958248573816, |
| "learning_rate": 1.610489292983401e-05, |
| "loss": 1.31, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3622567134762257, |
| "grad_norm": 0.7648471804581862, |
| "learning_rate": 1.6093983474223392e-05, |
| "loss": 1.259, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.3626508992362651, |
| "grad_norm": 0.7020781761512667, |
| "learning_rate": 1.6083062468940297e-05, |
| "loss": 1.3028, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3630450849963045, |
| "grad_norm": 0.6839393628121689, |
| "learning_rate": 1.6072129934682847e-05, |
| "loss": 1.2558, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3634392707563439, |
| "grad_norm": 0.7058988465923998, |
| "learning_rate": 1.606118589217102e-05, |
| "loss": 1.2582, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3638334565163833, |
| "grad_norm": 0.6791475273873648, |
| "learning_rate": 1.605023036214661e-05, |
| "loss": 1.2142, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.3642276422764228, |
| "grad_norm": 0.6970350336814236, |
| "learning_rate": 1.6039263365373167e-05, |
| "loss": 1.2528, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.3646218280364622, |
| "grad_norm": 0.6699695799738228, |
| "learning_rate": 1.602828492263598e-05, |
| "loss": 1.1959, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.3650160137965016, |
| "grad_norm": 0.663408412743378, |
| "learning_rate": 1.6017295054742045e-05, |
| "loss": 1.288, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.365410199556541, |
| "grad_norm": 0.7158290886170531, |
| "learning_rate": 1.6006293782519988e-05, |
| "loss": 1.2376, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.36580438531658044, |
| "grad_norm": 0.7543773228580308, |
| "learning_rate": 1.5995281126820067e-05, |
| "loss": 1.2899, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.36619857107661985, |
| "grad_norm": 0.744149002729838, |
| "learning_rate": 1.5984257108514107e-05, |
| "loss": 1.3389, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.36659275683665926, |
| "grad_norm": 0.7182715748702388, |
| "learning_rate": 1.5973221748495472e-05, |
| "loss": 1.3381, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3669869425966987, |
| "grad_norm": 0.7001237272757365, |
| "learning_rate": 1.5962175067679013e-05, |
| "loss": 1.2702, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.36738112835673814, |
| "grad_norm": 0.7077959320676287, |
| "learning_rate": 1.5951117087001048e-05, |
| "loss": 1.2647, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.36777531411677755, |
| "grad_norm": 0.693416521429882, |
| "learning_rate": 1.5940047827419305e-05, |
| "loss": 1.307, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.36816949987681696, |
| "grad_norm": 5.313983840642, |
| "learning_rate": 1.592896730991289e-05, |
| "loss": 1.3227, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.3685636856368564, |
| "grad_norm": 0.7174526675424638, |
| "learning_rate": 1.591787555548225e-05, |
| "loss": 1.2003, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3689578713968958, |
| "grad_norm": 0.7620783078614348, |
| "learning_rate": 1.590677258514911e-05, |
| "loss": 1.2984, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3693520571569352, |
| "grad_norm": 0.7102280092234018, |
| "learning_rate": 1.5895658419956485e-05, |
| "loss": 1.1827, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.3697462429169746, |
| "grad_norm": 0.7106880003780766, |
| "learning_rate": 1.588453308096857e-05, |
| "loss": 1.2557, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.370140428677014, |
| "grad_norm": 0.7113617397724621, |
| "learning_rate": 1.587339658927077e-05, |
| "loss": 1.2874, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3705346144370535, |
| "grad_norm": 0.7043522365953943, |
| "learning_rate": 1.5862248965969604e-05, |
| "loss": 1.2596, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3709288001970929, |
| "grad_norm": 0.7433597815080879, |
| "learning_rate": 1.5851090232192704e-05, |
| "loss": 1.3157, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3713229859571323, |
| "grad_norm": 0.6920086062528787, |
| "learning_rate": 1.5839920409088743e-05, |
| "loss": 1.2526, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.3717171717171717, |
| "grad_norm": 0.6806330894798819, |
| "learning_rate": 1.5828739517827426e-05, |
| "loss": 1.2665, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.37211135747721114, |
| "grad_norm": 0.693773375915683, |
| "learning_rate": 1.5817547579599436e-05, |
| "loss": 1.2284, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.37250554323725055, |
| "grad_norm": 0.679887610966136, |
| "learning_rate": 1.5806344615616375e-05, |
| "loss": 1.2231, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.37289972899728996, |
| "grad_norm": 0.6898748206285744, |
| "learning_rate": 1.5795130647110755e-05, |
| "loss": 1.3302, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.37329391475732937, |
| "grad_norm": 0.7348938348769922, |
| "learning_rate": 1.5783905695335947e-05, |
| "loss": 1.2388, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.37368810051736884, |
| "grad_norm": 0.7160016591377841, |
| "learning_rate": 1.577266978156613e-05, |
| "loss": 1.2105, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.37408228627740825, |
| "grad_norm": 0.840969755169091, |
| "learning_rate": 1.5761422927096268e-05, |
| "loss": 1.3243, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.37447647203744766, |
| "grad_norm": 0.6987504047644173, |
| "learning_rate": 1.5750165153242048e-05, |
| "loss": 1.28, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3748706577974871, |
| "grad_norm": 0.6995543811490563, |
| "learning_rate": 1.5738896481339857e-05, |
| "loss": 1.2808, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.3752648435575265, |
| "grad_norm": 0.7027815016727716, |
| "learning_rate": 1.5727616932746748e-05, |
| "loss": 1.348, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.3756590293175659, |
| "grad_norm": 0.7080676371673893, |
| "learning_rate": 1.5716326528840374e-05, |
| "loss": 1.2808, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.3760532150776053, |
| "grad_norm": 0.6906991486703912, |
| "learning_rate": 1.570502529101896e-05, |
| "loss": 1.2822, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.3764474008376447, |
| "grad_norm": 0.667842860069977, |
| "learning_rate": 1.569371324070128e-05, |
| "loss": 1.3153, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.3768415865976842, |
| "grad_norm": 0.6680351163338653, |
| "learning_rate": 1.5682390399326585e-05, |
| "loss": 1.2659, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.3772357723577236, |
| "grad_norm": 0.6839204182409985, |
| "learning_rate": 1.5671056788354583e-05, |
| "loss": 1.2726, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.377629958117763, |
| "grad_norm": 0.6663129665848542, |
| "learning_rate": 1.5659712429265403e-05, |
| "loss": 1.2778, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.3780241438778024, |
| "grad_norm": 0.693810071056339, |
| "learning_rate": 1.5648357343559518e-05, |
| "loss": 1.313, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.37841832963784183, |
| "grad_norm": 0.7242639411060869, |
| "learning_rate": 1.5636991552757762e-05, |
| "loss": 1.229, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.37881251539788124, |
| "grad_norm": 0.6902168937478176, |
| "learning_rate": 1.5625615078401244e-05, |
| "loss": 1.2342, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.37920670115792066, |
| "grad_norm": 0.6978251892798721, |
| "learning_rate": 1.561422794205131e-05, |
| "loss": 1.3456, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.37960088691796007, |
| "grad_norm": 0.710891024016947, |
| "learning_rate": 1.5602830165289536e-05, |
| "loss": 1.2539, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.37999507267799953, |
| "grad_norm": 0.6933794057288072, |
| "learning_rate": 1.5591421769717642e-05, |
| "loss": 1.2406, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.38038925843803895, |
| "grad_norm": 0.6512417427643563, |
| "learning_rate": 1.5580002776957493e-05, |
| "loss": 1.2212, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.38078344419807836, |
| "grad_norm": 0.6798711415370834, |
| "learning_rate": 1.5568573208651027e-05, |
| "loss": 1.2299, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.38117762995811777, |
| "grad_norm": 0.7169966010210781, |
| "learning_rate": 1.555713308646022e-05, |
| "loss": 1.2823, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3815718157181572, |
| "grad_norm": 0.7176225879361188, |
| "learning_rate": 1.5545682432067068e-05, |
| "loss": 1.3277, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.3819660014781966, |
| "grad_norm": 0.6634455855323579, |
| "learning_rate": 1.5534221267173513e-05, |
| "loss": 1.2707, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.382360187238236, |
| "grad_norm": 0.6523220060774133, |
| "learning_rate": 1.5522749613501424e-05, |
| "loss": 1.2224, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3827543729982754, |
| "grad_norm": 0.697086935286512, |
| "learning_rate": 1.551126749279255e-05, |
| "loss": 1.2247, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.3831485587583149, |
| "grad_norm": 0.6605814150970358, |
| "learning_rate": 1.5499774926808468e-05, |
| "loss": 1.2624, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.3835427445183543, |
| "grad_norm": 0.7011947342499778, |
| "learning_rate": 1.5488271937330562e-05, |
| "loss": 1.2972, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.3839369302783937, |
| "grad_norm": 0.693697524489148, |
| "learning_rate": 1.5476758546159966e-05, |
| "loss": 1.2054, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.3843311160384331, |
| "grad_norm": 0.6700050469107739, |
| "learning_rate": 1.5465234775117538e-05, |
| "loss": 1.2642, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.38472530179847253, |
| "grad_norm": 0.6977970028023794, |
| "learning_rate": 1.5453700646043793e-05, |
| "loss": 1.2929, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.38511948755851194, |
| "grad_norm": 0.7256704791236026, |
| "learning_rate": 1.5442156180798883e-05, |
| "loss": 1.2111, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.38551367331855135, |
| "grad_norm": 0.6833079658478705, |
| "learning_rate": 1.5430601401262554e-05, |
| "loss": 1.3011, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.38590785907859076, |
| "grad_norm": 0.6451358367434681, |
| "learning_rate": 1.54190363293341e-05, |
| "loss": 1.1995, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.38630204483863023, |
| "grad_norm": 0.6686548263294536, |
| "learning_rate": 1.540746098693231e-05, |
| "loss": 1.2538, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.38669623059866964, |
| "grad_norm": 0.6858165127408108, |
| "learning_rate": 1.5395875395995456e-05, |
| "loss": 1.3015, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.38709041635870906, |
| "grad_norm": 0.6603138490124963, |
| "learning_rate": 1.5384279578481223e-05, |
| "loss": 1.2443, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.38748460211874847, |
| "grad_norm": 0.6594884559786018, |
| "learning_rate": 1.537267355636668e-05, |
| "loss": 1.2314, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.3878787878787879, |
| "grad_norm": 0.6918016955048513, |
| "learning_rate": 1.536105735164823e-05, |
| "loss": 1.2714, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.3882729736388273, |
| "grad_norm": 0.75800219367767, |
| "learning_rate": 1.5349430986341588e-05, |
| "loss": 1.2889, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.3886671593988667, |
| "grad_norm": 0.7150281786878397, |
| "learning_rate": 1.5337794482481714e-05, |
| "loss": 1.2301, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.3890613451589061, |
| "grad_norm": 0.6864306072281939, |
| "learning_rate": 1.5326147862122796e-05, |
| "loss": 1.2146, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.3894555309189456, |
| "grad_norm": 0.7281857146660934, |
| "learning_rate": 1.531449114733818e-05, |
| "loss": 1.2998, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.389849716678985, |
| "grad_norm": 0.7064026433919306, |
| "learning_rate": 1.5302824360220352e-05, |
| "loss": 1.213, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3902439024390244, |
| "grad_norm": 0.678827373077648, |
| "learning_rate": 1.5291147522880887e-05, |
| "loss": 1.2899, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3906380881990638, |
| "grad_norm": 0.6825912010344036, |
| "learning_rate": 1.5279460657450408e-05, |
| "loss": 1.2508, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3910322739591032, |
| "grad_norm": 0.6897275293582734, |
| "learning_rate": 1.5267763786078544e-05, |
| "loss": 1.3103, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.39142645971914264, |
| "grad_norm": 0.6889677484856918, |
| "learning_rate": 1.5256056930933884e-05, |
| "loss": 1.2385, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.39182064547918205, |
| "grad_norm": 0.6756715938128258, |
| "learning_rate": 1.5244340114203946e-05, |
| "loss": 1.2811, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.39221483123922146, |
| "grad_norm": 0.6588263063642222, |
| "learning_rate": 1.5232613358095121e-05, |
| "loss": 1.2008, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.39260901699926093, |
| "grad_norm": 0.6649629443766613, |
| "learning_rate": 1.522087668483264e-05, |
| "loss": 1.2887, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.39300320275930034, |
| "grad_norm": 0.69537586560042, |
| "learning_rate": 1.5209130116660532e-05, |
| "loss": 1.2318, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.39339738851933975, |
| "grad_norm": 0.6548532801163026, |
| "learning_rate": 1.5197373675841572e-05, |
| "loss": 1.2321, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.39379157427937916, |
| "grad_norm": 0.6789611198366031, |
| "learning_rate": 1.5185607384657257e-05, |
| "loss": 1.2501, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.3941857600394186, |
| "grad_norm": 0.669469647081716, |
| "learning_rate": 1.5173831265407749e-05, |
| "loss": 1.2316, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.394579945799458, |
| "grad_norm": 0.6441524856006325, |
| "learning_rate": 1.5162045340411826e-05, |
| "loss": 1.2215, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3949741315594974, |
| "grad_norm": 0.6585151163796467, |
| "learning_rate": 1.5150249632006871e-05, |
| "loss": 1.2364, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.3953683173195368, |
| "grad_norm": 0.6590764235984096, |
| "learning_rate": 1.5138444162548791e-05, |
| "loss": 1.2507, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3957625030795763, |
| "grad_norm": 0.6746142261487992, |
| "learning_rate": 1.5126628954412002e-05, |
| "loss": 1.3095, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.3961566888396157, |
| "grad_norm": 0.6425820917957424, |
| "learning_rate": 1.5114804029989372e-05, |
| "loss": 1.2455, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.3965508745996551, |
| "grad_norm": 0.6885768302093563, |
| "learning_rate": 1.5102969411692186e-05, |
| "loss": 1.2067, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.3969450603596945, |
| "grad_norm": 0.6715538405865114, |
| "learning_rate": 1.5091125121950105e-05, |
| "loss": 1.2723, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.3973392461197339, |
| "grad_norm": 0.6572204758977973, |
| "learning_rate": 1.5079271183211118e-05, |
| "loss": 1.2676, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.39773343187977334, |
| "grad_norm": 0.6913182919431603, |
| "learning_rate": 1.5067407617941499e-05, |
| "loss": 1.2723, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.39812761763981275, |
| "grad_norm": 0.6859364323759741, |
| "learning_rate": 1.5055534448625766e-05, |
| "loss": 1.2672, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.39852180339985216, |
| "grad_norm": 0.6924966624789022, |
| "learning_rate": 1.5043651697766642e-05, |
| "loss": 1.2032, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3989159891598916, |
| "grad_norm": 0.696108235634334, |
| "learning_rate": 1.5031759387885008e-05, |
| "loss": 1.2286, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.39931017491993104, |
| "grad_norm": 0.683816830333667, |
| "learning_rate": 1.5019857541519866e-05, |
| "loss": 1.2596, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.39970436067997045, |
| "grad_norm": 0.6544409734476196, |
| "learning_rate": 1.5007946181228286e-05, |
| "loss": 1.1861, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.40009854644000986, |
| "grad_norm": 0.6828313055454289, |
| "learning_rate": 1.4996025329585368e-05, |
| "loss": 1.2627, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.4004927322000493, |
| "grad_norm": 0.7238896612698483, |
| "learning_rate": 1.4984095009184215e-05, |
| "loss": 1.2237, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.4008869179600887, |
| "grad_norm": 0.7255960311346755, |
| "learning_rate": 1.4972155242635853e-05, |
| "loss": 1.2553, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.4012811037201281, |
| "grad_norm": 0.6462351578732584, |
| "learning_rate": 1.496020605256923e-05, |
| "loss": 1.1924, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.4016752894801675, |
| "grad_norm": 0.6627446653808322, |
| "learning_rate": 1.4948247461631148e-05, |
| "loss": 1.237, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.402069475240207, |
| "grad_norm": 0.6825306611455508, |
| "learning_rate": 1.4936279492486222e-05, |
| "loss": 1.2397, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4024636610002464, |
| "grad_norm": 0.7150438816039062, |
| "learning_rate": 1.4924302167816845e-05, |
| "loss": 1.2152, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.4028578467602858, |
| "grad_norm": 0.7093178992414255, |
| "learning_rate": 1.4912315510323138e-05, |
| "loss": 1.2576, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.4032520325203252, |
| "grad_norm": 0.6985543458898392, |
| "learning_rate": 1.4900319542722921e-05, |
| "loss": 1.2673, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.4036462182803646, |
| "grad_norm": 0.6831019226556653, |
| "learning_rate": 1.488831428775164e-05, |
| "loss": 1.2049, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.40404040404040403, |
| "grad_norm": 0.6567400662964415, |
| "learning_rate": 1.4876299768162361e-05, |
| "loss": 1.1799, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.40443458980044344, |
| "grad_norm": 0.6954618711419809, |
| "learning_rate": 1.48642760067257e-05, |
| "loss": 1.329, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.40482877556048286, |
| "grad_norm": 0.7107685604813471, |
| "learning_rate": 1.4852243026229787e-05, |
| "loss": 1.2487, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.4052229613205223, |
| "grad_norm": 0.674580720557361, |
| "learning_rate": 1.4840200849480226e-05, |
| "loss": 1.2157, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.40561714708056174, |
| "grad_norm": 0.6638304289674144, |
| "learning_rate": 1.4828149499300061e-05, |
| "loss": 1.314, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.40601133284060115, |
| "grad_norm": 0.6757193376832867, |
| "learning_rate": 1.4816088998529707e-05, |
| "loss": 1.1997, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.40640551860064056, |
| "grad_norm": 0.7111016241633684, |
| "learning_rate": 1.4804019370026927e-05, |
| "loss": 1.2307, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.40679970436067997, |
| "grad_norm": 0.6336887603576372, |
| "learning_rate": 1.4791940636666785e-05, |
| "loss": 1.2429, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.4071938901207194, |
| "grad_norm": 0.7121301295945476, |
| "learning_rate": 1.47798528213416e-05, |
| "loss": 1.2347, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.4075880758807588, |
| "grad_norm": 0.6798719496665275, |
| "learning_rate": 1.4767755946960902e-05, |
| "loss": 1.214, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.4079822616407982, |
| "grad_norm": 0.672163959841733, |
| "learning_rate": 1.4755650036451397e-05, |
| "loss": 1.2129, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.40837644740083767, |
| "grad_norm": 0.6580322284929199, |
| "learning_rate": 1.474353511275691e-05, |
| "loss": 1.233, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.4087706331608771, |
| "grad_norm": 0.8559124631644651, |
| "learning_rate": 1.4731411198838346e-05, |
| "loss": 1.3092, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.4091648189209165, |
| "grad_norm": 0.6612192406553391, |
| "learning_rate": 1.4719278317673655e-05, |
| "loss": 1.255, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.4095590046809559, |
| "grad_norm": 0.6480565858040689, |
| "learning_rate": 1.4707136492257783e-05, |
| "loss": 1.1938, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.4099531904409953, |
| "grad_norm": 0.6847017126697683, |
| "learning_rate": 1.4694985745602623e-05, |
| "loss": 1.2823, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.41034737620103473, |
| "grad_norm": 0.6625824656368514, |
| "learning_rate": 1.4682826100736973e-05, |
| "loss": 1.2196, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.41074156196107414, |
| "grad_norm": 0.6520046231301477, |
| "learning_rate": 1.4670657580706511e-05, |
| "loss": 1.2129, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.41113574772111355, |
| "grad_norm": 0.6568163192077175, |
| "learning_rate": 1.4658480208573717e-05, |
| "loss": 1.205, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.411529933481153, |
| "grad_norm": 0.7355354070775183, |
| "learning_rate": 1.4646294007417858e-05, |
| "loss": 1.2509, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.41192411924119243, |
| "grad_norm": 0.6584335682341751, |
| "learning_rate": 1.4634099000334932e-05, |
| "loss": 1.2131, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.41231830500123184, |
| "grad_norm": 0.6787385568676211, |
| "learning_rate": 1.4621895210437627e-05, |
| "loss": 1.2844, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.41271249076127126, |
| "grad_norm": 0.6534106417043676, |
| "learning_rate": 1.4609682660855277e-05, |
| "loss": 1.2036, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.41310667652131067, |
| "grad_norm": 0.6670476383359956, |
| "learning_rate": 1.4597461374733817e-05, |
| "loss": 1.2027, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.4135008622813501, |
| "grad_norm": 0.6869267202912966, |
| "learning_rate": 1.458523137523574e-05, |
| "loss": 1.2417, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.4138950480413895, |
| "grad_norm": 0.6825156046026267, |
| "learning_rate": 1.4572992685540057e-05, |
| "loss": 1.2732, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.4142892338014289, |
| "grad_norm": 0.6393859537214149, |
| "learning_rate": 1.4560745328842238e-05, |
| "loss": 1.2022, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.41468341956146837, |
| "grad_norm": 0.6783345452247255, |
| "learning_rate": 1.4548489328354197e-05, |
| "loss": 1.2039, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.4150776053215078, |
| "grad_norm": 0.6856742550565621, |
| "learning_rate": 1.4536224707304209e-05, |
| "loss": 1.2333, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.4154717910815472, |
| "grad_norm": 0.6797781228333333, |
| "learning_rate": 1.4523951488936905e-05, |
| "loss": 1.2458, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.4158659768415866, |
| "grad_norm": 0.6687542124726085, |
| "learning_rate": 1.4511669696513206e-05, |
| "loss": 1.2859, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.416260162601626, |
| "grad_norm": 0.654994598290333, |
| "learning_rate": 1.4499379353310275e-05, |
| "loss": 1.2514, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.4166543483616654, |
| "grad_norm": 0.6710277195302214, |
| "learning_rate": 1.4487080482621485e-05, |
| "loss": 1.1726, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.41704853412170484, |
| "grad_norm": 0.6975157864795727, |
| "learning_rate": 1.4474773107756379e-05, |
| "loss": 1.3039, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.41744271988174425, |
| "grad_norm": 0.6847631484475221, |
| "learning_rate": 1.4462457252040606e-05, |
| "loss": 1.2934, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.4178369056417837, |
| "grad_norm": 0.6569155149197007, |
| "learning_rate": 1.4450132938815896e-05, |
| "loss": 1.2399, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.41823109140182313, |
| "grad_norm": 0.6551116832105975, |
| "learning_rate": 1.443780019144e-05, |
| "loss": 1.2549, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.41862527716186254, |
| "grad_norm": 0.6908963315449874, |
| "learning_rate": 1.4425459033286664e-05, |
| "loss": 1.2723, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.41901946292190195, |
| "grad_norm": 0.669999734161243, |
| "learning_rate": 1.4413109487745571e-05, |
| "loss": 1.2034, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.41941364868194136, |
| "grad_norm": 0.6569047790921405, |
| "learning_rate": 1.4400751578222293e-05, |
| "loss": 1.2124, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.4198078344419808, |
| "grad_norm": 0.6641788447379324, |
| "learning_rate": 1.438838532813827e-05, |
| "loss": 1.2311, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.4202020202020202, |
| "grad_norm": 0.6421382945573415, |
| "learning_rate": 1.437601076093073e-05, |
| "loss": 1.2624, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.4205962059620596, |
| "grad_norm": 0.6987072260804941, |
| "learning_rate": 1.4363627900052676e-05, |
| "loss": 1.2533, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.42099039172209907, |
| "grad_norm": 0.7038543852283208, |
| "learning_rate": 1.435123676897283e-05, |
| "loss": 1.2362, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.4213845774821385, |
| "grad_norm": 0.6582422377441999, |
| "learning_rate": 1.4338837391175582e-05, |
| "loss": 1.2929, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.4217787632421779, |
| "grad_norm": 0.6549666509553242, |
| "learning_rate": 1.4326429790160958e-05, |
| "loss": 1.2912, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4221729490022173, |
| "grad_norm": 0.6609389567208854, |
| "learning_rate": 1.4314013989444566e-05, |
| "loss": 1.2242, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.4225671347622567, |
| "grad_norm": 0.6742945321694513, |
| "learning_rate": 1.4301590012557553e-05, |
| "loss": 1.2606, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.4229613205222961, |
| "grad_norm": 0.6841196388200714, |
| "learning_rate": 1.4289157883046567e-05, |
| "loss": 1.1914, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.42335550628233554, |
| "grad_norm": 0.6781835047036432, |
| "learning_rate": 1.4276717624473697e-05, |
| "loss": 1.2149, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.42374969204237495, |
| "grad_norm": 0.6384771187611207, |
| "learning_rate": 1.4264269260416455e-05, |
| "loss": 1.194, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.4241438778024144, |
| "grad_norm": 0.6392205051998697, |
| "learning_rate": 1.4251812814467701e-05, |
| "loss": 1.2314, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.4245380635624538, |
| "grad_norm": 0.6789060040382907, |
| "learning_rate": 1.4239348310235613e-05, |
| "loss": 1.2207, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.42493224932249324, |
| "grad_norm": 0.6479589435408246, |
| "learning_rate": 1.4226875771343656e-05, |
| "loss": 1.2104, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.42532643508253265, |
| "grad_norm": 0.6575432784037729, |
| "learning_rate": 1.4214395221430501e-05, |
| "loss": 1.2749, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.42572062084257206, |
| "grad_norm": 0.701850378214208, |
| "learning_rate": 1.420190668415002e-05, |
| "loss": 1.2202, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.4261148066026115, |
| "grad_norm": 2.0536053216353896, |
| "learning_rate": 1.4189410183171214e-05, |
| "loss": 1.1963, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.4265089923626509, |
| "grad_norm": 0.6609999350419868, |
| "learning_rate": 1.417690574217818e-05, |
| "loss": 1.2504, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.4269031781226903, |
| "grad_norm": 0.6612267333571307, |
| "learning_rate": 1.4164393384870065e-05, |
| "loss": 1.2665, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.42729736388272976, |
| "grad_norm": 0.6757638887255789, |
| "learning_rate": 1.4151873134961014e-05, |
| "loss": 1.1514, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.4276915496427692, |
| "grad_norm": 0.683456163531099, |
| "learning_rate": 1.4139345016180135e-05, |
| "loss": 1.3079, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.4280857354028086, |
| "grad_norm": 0.8513875836873347, |
| "learning_rate": 1.4126809052271453e-05, |
| "loss": 1.2724, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.428479921162848, |
| "grad_norm": 0.6442638283664752, |
| "learning_rate": 1.4114265266993847e-05, |
| "loss": 1.2173, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.4288741069228874, |
| "grad_norm": 0.6509895157275494, |
| "learning_rate": 1.4101713684121042e-05, |
| "loss": 1.2479, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.4292682926829268, |
| "grad_norm": 0.6474693228576278, |
| "learning_rate": 1.408915432744152e-05, |
| "loss": 1.2125, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.42966247844296623, |
| "grad_norm": 0.6735783131189829, |
| "learning_rate": 1.407658722075851e-05, |
| "loss": 1.2068, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.43005666420300565, |
| "grad_norm": 0.6537663595057571, |
| "learning_rate": 1.406401238788992e-05, |
| "loss": 1.2156, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.4304508499630451, |
| "grad_norm": 0.6544657627047221, |
| "learning_rate": 1.4051429852668312e-05, |
| "loss": 1.2576, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.4308450357230845, |
| "grad_norm": 0.6301328044253675, |
| "learning_rate": 1.4038839638940835e-05, |
| "loss": 1.1426, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.43123922148312394, |
| "grad_norm": 0.6847962737010194, |
| "learning_rate": 1.4026241770569198e-05, |
| "loss": 1.1885, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.43163340724316335, |
| "grad_norm": 0.6471962172332811, |
| "learning_rate": 1.4013636271429612e-05, |
| "loss": 1.2111, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.43202759300320276, |
| "grad_norm": 0.6655421827524571, |
| "learning_rate": 1.4001023165412754e-05, |
| "loss": 1.2754, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.43242177876324217, |
| "grad_norm": 0.6748073371066969, |
| "learning_rate": 1.3988402476423722e-05, |
| "loss": 1.254, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.4328159645232816, |
| "grad_norm": 0.6557610559912413, |
| "learning_rate": 1.3975774228381975e-05, |
| "loss": 1.2439, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.433210150283321, |
| "grad_norm": 0.6632658788983514, |
| "learning_rate": 1.3963138445221311e-05, |
| "loss": 1.2516, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.43360433604336046, |
| "grad_norm": 0.6491486867598589, |
| "learning_rate": 1.3950495150889793e-05, |
| "loss": 1.2335, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.4339985218033999, |
| "grad_norm": 0.6517729673881756, |
| "learning_rate": 1.3937844369349736e-05, |
| "loss": 1.2167, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.4343927075634393, |
| "grad_norm": 0.6782382384926667, |
| "learning_rate": 1.3925186124577639e-05, |
| "loss": 1.2425, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.4347868933234787, |
| "grad_norm": 0.6591309286023143, |
| "learning_rate": 1.3912520440564139e-05, |
| "loss": 1.2043, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.4351810790835181, |
| "grad_norm": 0.6546464680178252, |
| "learning_rate": 1.3899847341313982e-05, |
| "loss": 1.1904, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.4355752648435575, |
| "grad_norm": 0.6446542186074286, |
| "learning_rate": 1.3887166850845963e-05, |
| "loss": 1.1976, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.43596945060359693, |
| "grad_norm": 0.6591279097552126, |
| "learning_rate": 1.3874478993192886e-05, |
| "loss": 1.2711, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 0.6877144132235246, |
| "learning_rate": 1.386178379240152e-05, |
| "loss": 1.2061, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.4367578221236758, |
| "grad_norm": 0.6207199280492006, |
| "learning_rate": 1.3849081272532545e-05, |
| "loss": 1.1999, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4371520078837152, |
| "grad_norm": 0.6863520493826831, |
| "learning_rate": 1.383637145766052e-05, |
| "loss": 1.2781, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.43754619364375463, |
| "grad_norm": 0.6329597392455102, |
| "learning_rate": 1.3823654371873827e-05, |
| "loss": 1.2203, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.43794037940379404, |
| "grad_norm": 0.6453430853174527, |
| "learning_rate": 1.3810930039274626e-05, |
| "loss": 1.2341, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.43833456516383346, |
| "grad_norm": 0.7008614015575915, |
| "learning_rate": 1.3798198483978816e-05, |
| "loss": 1.3045, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.43872875092387287, |
| "grad_norm": 0.6526995169723234, |
| "learning_rate": 1.3785459730115975e-05, |
| "loss": 1.2444, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.4391229366839123, |
| "grad_norm": 0.6648665882412224, |
| "learning_rate": 1.3772713801829338e-05, |
| "loss": 1.2346, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.4395171224439517, |
| "grad_norm": 0.6521080562166568, |
| "learning_rate": 1.375996072327573e-05, |
| "loss": 1.2473, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.43991130820399116, |
| "grad_norm": 0.6354275169637564, |
| "learning_rate": 1.374720051862553e-05, |
| "loss": 1.2316, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.44030549396403057, |
| "grad_norm": 0.6614840460671958, |
| "learning_rate": 1.3734433212062617e-05, |
| "loss": 1.2004, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.44069967972407, |
| "grad_norm": 0.6662537159779596, |
| "learning_rate": 1.3721658827784335e-05, |
| "loss": 1.2901, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4410938654841094, |
| "grad_norm": 0.6687056517988047, |
| "learning_rate": 1.3708877390001442e-05, |
| "loss": 1.2539, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.4414880512441488, |
| "grad_norm": 0.6733214755511964, |
| "learning_rate": 1.3696088922938065e-05, |
| "loss": 1.2515, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.4418822370041882, |
| "grad_norm": 0.6535655596127494, |
| "learning_rate": 1.3683293450831649e-05, |
| "loss": 1.2305, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.44227642276422763, |
| "grad_norm": 0.710139241305188, |
| "learning_rate": 1.3670490997932922e-05, |
| "loss": 1.3349, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.44267060852426704, |
| "grad_norm": 0.6301043045063337, |
| "learning_rate": 1.3657681588505835e-05, |
| "loss": 1.1704, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.4430647942843065, |
| "grad_norm": 0.6659655009342225, |
| "learning_rate": 1.3644865246827528e-05, |
| "loss": 1.2175, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.4434589800443459, |
| "grad_norm": 0.6562665211091786, |
| "learning_rate": 1.3632041997188278e-05, |
| "loss": 1.298, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.44385316580438533, |
| "grad_norm": 0.6649159181775033, |
| "learning_rate": 1.3619211863891458e-05, |
| "loss": 1.2194, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.44424735156442474, |
| "grad_norm": 0.6563076400799585, |
| "learning_rate": 1.3606374871253474e-05, |
| "loss": 1.2257, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.44464153732446415, |
| "grad_norm": 0.6289604646597672, |
| "learning_rate": 1.3593531043603756e-05, |
| "loss": 1.2144, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.44503572308450356, |
| "grad_norm": 1.1206270057176397, |
| "learning_rate": 1.3580680405284666e-05, |
| "loss": 1.1742, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.445429908844543, |
| "grad_norm": 0.7010573881465098, |
| "learning_rate": 1.3567822980651481e-05, |
| "loss": 1.2557, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4458240946045824, |
| "grad_norm": 0.6819687881969332, |
| "learning_rate": 1.3554958794072346e-05, |
| "loss": 1.2628, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.44621828036462186, |
| "grad_norm": 0.6631424239254387, |
| "learning_rate": 1.3542087869928215e-05, |
| "loss": 1.2664, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.44661246612466127, |
| "grad_norm": 0.6884792830902806, |
| "learning_rate": 1.3529210232612815e-05, |
| "loss": 1.2151, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.4470066518847007, |
| "grad_norm": 0.6743020797905825, |
| "learning_rate": 1.3516325906532592e-05, |
| "loss": 1.2173, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.4474008376447401, |
| "grad_norm": 0.6748726425122616, |
| "learning_rate": 1.350343491610667e-05, |
| "loss": 1.2951, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.4477950234047795, |
| "grad_norm": 0.6790188323448472, |
| "learning_rate": 1.3490537285766809e-05, |
| "loss": 1.2548, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.4481892091648189, |
| "grad_norm": 0.7188066208980596, |
| "learning_rate": 1.3477633039957346e-05, |
| "loss": 1.3093, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.4485833949248583, |
| "grad_norm": 0.6778429503766523, |
| "learning_rate": 1.3464722203135164e-05, |
| "loss": 1.253, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.44897758068489774, |
| "grad_norm": 0.6610758959536769, |
| "learning_rate": 1.3451804799769625e-05, |
| "loss": 1.1997, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.4493717664449372, |
| "grad_norm": 0.6661694419731813, |
| "learning_rate": 1.3438880854342552e-05, |
| "loss": 1.2346, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4497659522049766, |
| "grad_norm": 0.6668706103840563, |
| "learning_rate": 1.3425950391348154e-05, |
| "loss": 1.2652, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.450160137965016, |
| "grad_norm": 0.653413813618824, |
| "learning_rate": 1.3413013435293004e-05, |
| "loss": 1.1574, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.45055432372505544, |
| "grad_norm": 0.6626392658566362, |
| "learning_rate": 1.3400070010695966e-05, |
| "loss": 1.2584, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.45094850948509485, |
| "grad_norm": 0.6612645982158664, |
| "learning_rate": 1.3387120142088182e-05, |
| "loss": 1.3095, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.45134269524513426, |
| "grad_norm": 0.6343193781713191, |
| "learning_rate": 1.3374163854012987e-05, |
| "loss": 1.1738, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.4517368810051737, |
| "grad_norm": 0.6914178485118841, |
| "learning_rate": 1.33612011710259e-05, |
| "loss": 1.2289, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.4521310667652131, |
| "grad_norm": 0.6349842783208113, |
| "learning_rate": 1.3348232117694555e-05, |
| "loss": 1.1942, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.45252525252525255, |
| "grad_norm": 0.6878005677404854, |
| "learning_rate": 1.333525671859865e-05, |
| "loss": 1.2197, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.45291943828529196, |
| "grad_norm": 0.708515154245003, |
| "learning_rate": 1.3322274998329925e-05, |
| "loss": 1.217, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.4533136240453314, |
| "grad_norm": 0.6654307895746174, |
| "learning_rate": 1.3309286981492084e-05, |
| "loss": 1.2182, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4537078098053708, |
| "grad_norm": 0.6849958565571799, |
| "learning_rate": 1.3296292692700781e-05, |
| "loss": 1.262, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.4541019955654102, |
| "grad_norm": 0.661458414456228, |
| "learning_rate": 1.3283292156583542e-05, |
| "loss": 1.2237, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.4544961813254496, |
| "grad_norm": 0.6445694725984406, |
| "learning_rate": 1.3270285397779743e-05, |
| "loss": 1.2046, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.454890367085489, |
| "grad_norm": 0.6880572438702209, |
| "learning_rate": 1.3257272440940559e-05, |
| "loss": 1.2517, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.45528455284552843, |
| "grad_norm": 0.6462853469948439, |
| "learning_rate": 1.324425331072889e-05, |
| "loss": 1.1937, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.4556787386055679, |
| "grad_norm": 0.6937504964864099, |
| "learning_rate": 1.3231228031819358e-05, |
| "loss": 1.2315, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4560729243656073, |
| "grad_norm": 0.6935002768528703, |
| "learning_rate": 1.3218196628898232e-05, |
| "loss": 1.2941, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.4564671101256467, |
| "grad_norm": 0.6646155460144206, |
| "learning_rate": 1.320515912666338e-05, |
| "loss": 1.1961, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.45686129588568614, |
| "grad_norm": 0.675642433429094, |
| "learning_rate": 1.319211554982424e-05, |
| "loss": 1.1793, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.45725548164572555, |
| "grad_norm": 0.6626358544782226, |
| "learning_rate": 1.3179065923101759e-05, |
| "loss": 1.2279, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.45764966740576496, |
| "grad_norm": 0.6633366399850951, |
| "learning_rate": 1.3166010271228347e-05, |
| "loss": 1.2472, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.45804385316580437, |
| "grad_norm": 0.6572172161629819, |
| "learning_rate": 1.3152948618947839e-05, |
| "loss": 1.2959, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.4584380389258438, |
| "grad_norm": 0.6234010246471685, |
| "learning_rate": 1.3139880991015432e-05, |
| "loss": 1.1878, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.45883222468588325, |
| "grad_norm": 0.6445399860459299, |
| "learning_rate": 1.3126807412197666e-05, |
| "loss": 1.2468, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.45922641044592266, |
| "grad_norm": 0.6746604279800079, |
| "learning_rate": 1.3113727907272341e-05, |
| "loss": 1.2452, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4596205962059621, |
| "grad_norm": 0.6634669603961608, |
| "learning_rate": 1.3100642501028502e-05, |
| "loss": 1.2124, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4600147819660015, |
| "grad_norm": 0.6589031509633928, |
| "learning_rate": 1.3087551218266373e-05, |
| "loss": 1.2681, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.4604089677260409, |
| "grad_norm": 0.6488880528092997, |
| "learning_rate": 1.307445408379731e-05, |
| "loss": 1.2313, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.4608031534860803, |
| "grad_norm": 0.6461518831877928, |
| "learning_rate": 1.3061351122443774e-05, |
| "loss": 1.173, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.4611973392461197, |
| "grad_norm": 0.6719867860616543, |
| "learning_rate": 1.304824235903925e-05, |
| "loss": 1.2363, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.46159152500615913, |
| "grad_norm": 0.6720218506435118, |
| "learning_rate": 1.3035127818428239e-05, |
| "loss": 1.2999, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.4619857107661986, |
| "grad_norm": 0.6216405882359431, |
| "learning_rate": 1.302200752546618e-05, |
| "loss": 1.1873, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.462379896526238, |
| "grad_norm": 0.6615993873842473, |
| "learning_rate": 1.3008881505019413e-05, |
| "loss": 1.2329, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.4627740822862774, |
| "grad_norm": 0.6332451929136712, |
| "learning_rate": 1.2995749781965139e-05, |
| "loss": 1.1945, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.46316826804631683, |
| "grad_norm": 0.6600204388313866, |
| "learning_rate": 1.2982612381191368e-05, |
| "loss": 1.1736, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.46356245380635624, |
| "grad_norm": 0.6700748596784245, |
| "learning_rate": 1.296946932759686e-05, |
| "loss": 1.2847, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.46395663956639566, |
| "grad_norm": 0.6650184197669182, |
| "learning_rate": 1.2956320646091106e-05, |
| "loss": 1.2097, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.46435082532643507, |
| "grad_norm": 0.6626476795340289, |
| "learning_rate": 1.2943166361594242e-05, |
| "loss": 1.2041, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.4647450110864745, |
| "grad_norm": 0.6475300925870908, |
| "learning_rate": 1.293000649903704e-05, |
| "loss": 1.2847, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.46513919684651395, |
| "grad_norm": 0.6563755699385965, |
| "learning_rate": 1.2916841083360836e-05, |
| "loss": 1.2188, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.46553338260655336, |
| "grad_norm": 0.6558206126815487, |
| "learning_rate": 1.2903670139517495e-05, |
| "loss": 1.2171, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.46592756836659277, |
| "grad_norm": 0.6366861432284558, |
| "learning_rate": 1.2890493692469357e-05, |
| "loss": 1.2451, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.4663217541266322, |
| "grad_norm": 0.6759773243408979, |
| "learning_rate": 1.2877311767189192e-05, |
| "loss": 1.2673, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.4667159398866716, |
| "grad_norm": 0.6419744413255126, |
| "learning_rate": 1.2864124388660148e-05, |
| "loss": 1.1927, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.467110125646711, |
| "grad_norm": 0.6665800678685042, |
| "learning_rate": 1.2850931581875723e-05, |
| "loss": 1.241, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.4675043114067504, |
| "grad_norm": 0.647473022755396, |
| "learning_rate": 1.283773337183968e-05, |
| "loss": 1.2654, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.46789849716678983, |
| "grad_norm": 0.6627384520276431, |
| "learning_rate": 1.2824529783566044e-05, |
| "loss": 1.2103, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.4682926829268293, |
| "grad_norm": 0.6984420515522787, |
| "learning_rate": 1.2811320842079026e-05, |
| "loss": 1.2189, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.4686868686868687, |
| "grad_norm": 0.6838425822588616, |
| "learning_rate": 1.2798106572412973e-05, |
| "loss": 1.2817, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.4690810544469081, |
| "grad_norm": 0.6918032431384864, |
| "learning_rate": 1.278488699961235e-05, |
| "loss": 1.2529, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.46947524020694753, |
| "grad_norm": 0.6948726963202924, |
| "learning_rate": 1.2771662148731653e-05, |
| "loss": 1.2411, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.46986942596698694, |
| "grad_norm": 0.6429092095036071, |
| "learning_rate": 1.275843204483539e-05, |
| "loss": 1.2295, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.47026361172702635, |
| "grad_norm": 0.6351964026733381, |
| "learning_rate": 1.2745196712998032e-05, |
| "loss": 1.2073, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.47065779748706577, |
| "grad_norm": 0.6921674003382929, |
| "learning_rate": 1.2731956178303941e-05, |
| "loss": 1.2549, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.4710519832471052, |
| "grad_norm": 0.6322772440878668, |
| "learning_rate": 1.2718710465847355e-05, |
| "loss": 1.2263, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.47144616900714464, |
| "grad_norm": 0.6452486149856621, |
| "learning_rate": 1.2705459600732319e-05, |
| "loss": 1.2562, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.47184035476718406, |
| "grad_norm": 0.6629534381246308, |
| "learning_rate": 1.2692203608072646e-05, |
| "loss": 1.2418, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.47223454052722347, |
| "grad_norm": 0.6619087288650083, |
| "learning_rate": 1.2678942512991865e-05, |
| "loss": 1.1517, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4726287262872629, |
| "grad_norm": 0.6639361742877278, |
| "learning_rate": 1.2665676340623172e-05, |
| "loss": 1.1919, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4730229120473023, |
| "grad_norm": 0.6771450309425207, |
| "learning_rate": 1.2652405116109394e-05, |
| "loss": 1.2983, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4734170978073417, |
| "grad_norm": 0.6592820641641075, |
| "learning_rate": 1.2639128864602932e-05, |
| "loss": 1.2035, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.4738112835673811, |
| "grad_norm": 0.6754237204338704, |
| "learning_rate": 1.2625847611265703e-05, |
| "loss": 1.2545, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4742054693274205, |
| "grad_norm": 0.6746663309712343, |
| "learning_rate": 1.2612561381269113e-05, |
| "loss": 1.167, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.47459965508746, |
| "grad_norm": 0.6499219261911088, |
| "learning_rate": 1.2599270199794008e-05, |
| "loss": 1.2697, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.4749938408474994, |
| "grad_norm": 0.6496215506080194, |
| "learning_rate": 1.2585974092030597e-05, |
| "loss": 1.2177, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.4753880266075388, |
| "grad_norm": 0.6507804232904032, |
| "learning_rate": 1.2572673083178448e-05, |
| "loss": 1.2166, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.47578221236757823, |
| "grad_norm": 0.6350993220502519, |
| "learning_rate": 1.2559367198446401e-05, |
| "loss": 1.1809, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.47617639812761764, |
| "grad_norm": 0.6638184807925088, |
| "learning_rate": 1.254605646305255e-05, |
| "loss": 1.3182, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.47657058388765705, |
| "grad_norm": 0.638690190001186, |
| "learning_rate": 1.2532740902224171e-05, |
| "loss": 1.219, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.47696476964769646, |
| "grad_norm": 0.6431222064327176, |
| "learning_rate": 1.2519420541197696e-05, |
| "loss": 1.2105, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4773589554077359, |
| "grad_norm": 0.6385515617572074, |
| "learning_rate": 1.2506095405218646e-05, |
| "loss": 1.2066, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.47775314116777534, |
| "grad_norm": 0.6625298662888042, |
| "learning_rate": 1.249276551954159e-05, |
| "loss": 1.2048, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.47814732692781475, |
| "grad_norm": 0.6511188776236311, |
| "learning_rate": 1.2479430909430109e-05, |
| "loss": 1.2683, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.47854151268785416, |
| "grad_norm": 0.6431132536314119, |
| "learning_rate": 1.2466091600156736e-05, |
| "loss": 1.2451, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.4789356984478936, |
| "grad_norm": 0.6639747730945537, |
| "learning_rate": 1.2452747617002902e-05, |
| "loss": 1.2442, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.479329884207933, |
| "grad_norm": 0.6533976794673589, |
| "learning_rate": 1.24393989852589e-05, |
| "loss": 1.2325, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.4797240699679724, |
| "grad_norm": 0.6457330805526268, |
| "learning_rate": 1.2426045730223842e-05, |
| "loss": 1.2082, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.4801182557280118, |
| "grad_norm": 0.6610877473382107, |
| "learning_rate": 1.2412687877205587e-05, |
| "loss": 1.2377, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.4805124414880512, |
| "grad_norm": 0.6592577931155573, |
| "learning_rate": 1.2399325451520718e-05, |
| "loss": 1.2529, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.4809066272480907, |
| "grad_norm": 0.6661159851544838, |
| "learning_rate": 1.2385958478494487e-05, |
| "loss": 1.3026, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4813008130081301, |
| "grad_norm": 0.6643157743331228, |
| "learning_rate": 1.2372586983460755e-05, |
| "loss": 1.1742, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4816949987681695, |
| "grad_norm": 0.6520829662785887, |
| "learning_rate": 1.2359210991761958e-05, |
| "loss": 1.2212, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4820891845282089, |
| "grad_norm": 0.6421284812980386, |
| "learning_rate": 1.2345830528749059e-05, |
| "loss": 1.2352, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.48248337028824834, |
| "grad_norm": 0.6474967726372801, |
| "learning_rate": 1.233244561978149e-05, |
| "loss": 1.1619, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.48287755604828775, |
| "grad_norm": 0.6621910058206888, |
| "learning_rate": 1.2319056290227106e-05, |
| "loss": 1.2398, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.48327174180832716, |
| "grad_norm": 0.5884735021292232, |
| "learning_rate": 1.2305662565462146e-05, |
| "loss": 1.2038, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.48366592756836657, |
| "grad_norm": 0.641700494355378, |
| "learning_rate": 1.2292264470871183e-05, |
| "loss": 1.2872, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.48406011332840604, |
| "grad_norm": 0.6360792810507947, |
| "learning_rate": 1.2278862031847061e-05, |
| "loss": 1.237, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.48445429908844545, |
| "grad_norm": 0.6242051518141506, |
| "learning_rate": 1.226545527379086e-05, |
| "loss": 1.1896, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.48484848484848486, |
| "grad_norm": 0.6506990087447501, |
| "learning_rate": 1.2252044222111859e-05, |
| "loss": 1.1949, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.4852426706085243, |
| "grad_norm": 0.6592019538150893, |
| "learning_rate": 1.2238628902227454e-05, |
| "loss": 1.1833, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4856368563685637, |
| "grad_norm": 0.6880800573570197, |
| "learning_rate": 1.2225209339563144e-05, |
| "loss": 1.2481, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4860310421286031, |
| "grad_norm": 0.6700259002004992, |
| "learning_rate": 1.2211785559552472e-05, |
| "loss": 1.27, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.4864252278886425, |
| "grad_norm": 0.6679202540830845, |
| "learning_rate": 1.2198357587636958e-05, |
| "loss": 1.182, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4868194136486819, |
| "grad_norm": 0.6583277626537555, |
| "learning_rate": 1.2184925449266083e-05, |
| "loss": 1.2575, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.4872135994087214, |
| "grad_norm": 0.6510891521467633, |
| "learning_rate": 1.2171489169897217e-05, |
| "loss": 1.216, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.4876077851687608, |
| "grad_norm": 0.697605524032823, |
| "learning_rate": 1.215804877499558e-05, |
| "loss": 1.2935, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.4880019709288002, |
| "grad_norm": 0.6752644934446952, |
| "learning_rate": 1.2144604290034193e-05, |
| "loss": 1.1875, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.4883961566888396, |
| "grad_norm": 0.6290688021299883, |
| "learning_rate": 1.2131155740493816e-05, |
| "loss": 1.1835, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.48879034244887903, |
| "grad_norm": 0.6341260406172561, |
| "learning_rate": 1.211770315186294e-05, |
| "loss": 1.2685, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.48918452820891845, |
| "grad_norm": 0.6299349925825592, |
| "learning_rate": 1.2104246549637683e-05, |
| "loss": 1.2167, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.48957871396895786, |
| "grad_norm": 0.6372753688281468, |
| "learning_rate": 1.2090785959321783e-05, |
| "loss": 1.2302, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.48997289972899727, |
| "grad_norm": 0.6420141409041106, |
| "learning_rate": 1.2077321406426542e-05, |
| "loss": 1.1826, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.49036708548903674, |
| "grad_norm": 0.6693778503790639, |
| "learning_rate": 1.2063852916470755e-05, |
| "loss": 1.2352, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.49076127124907615, |
| "grad_norm": 0.6667762505796914, |
| "learning_rate": 1.2050380514980697e-05, |
| "loss": 1.2304, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.49115545700911556, |
| "grad_norm": 0.6574623314489658, |
| "learning_rate": 1.2036904227490043e-05, |
| "loss": 1.2036, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.49154964276915497, |
| "grad_norm": 0.6576866899161838, |
| "learning_rate": 1.2023424079539841e-05, |
| "loss": 1.2693, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.4919438285291944, |
| "grad_norm": 0.6854866850287104, |
| "learning_rate": 1.2009940096678451e-05, |
| "loss": 1.2331, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.4923380142892338, |
| "grad_norm": 0.6591589410360849, |
| "learning_rate": 1.1996452304461502e-05, |
| "loss": 1.1481, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.4927322000492732, |
| "grad_norm": 0.657166055362852, |
| "learning_rate": 1.1982960728451847e-05, |
| "loss": 1.2066, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.4931263858093126, |
| "grad_norm": 0.6500616754839462, |
| "learning_rate": 1.1969465394219503e-05, |
| "loss": 1.2311, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.4935205715693521, |
| "grad_norm": 0.7215977353713153, |
| "learning_rate": 1.1955966327341614e-05, |
| "loss": 1.2991, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4939147573293915, |
| "grad_norm": 0.6380629207396062, |
| "learning_rate": 1.1942463553402407e-05, |
| "loss": 1.1492, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4943089430894309, |
| "grad_norm": 0.6438522141604093, |
| "learning_rate": 1.192895709799311e-05, |
| "loss": 1.2256, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.4947031288494703, |
| "grad_norm": 0.6829774495136759, |
| "learning_rate": 1.1915446986711953e-05, |
| "loss": 1.2092, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.49509731460950973, |
| "grad_norm": 0.6414485475773434, |
| "learning_rate": 1.1901933245164085e-05, |
| "loss": 1.1672, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.49549150036954914, |
| "grad_norm": 0.6353044864393161, |
| "learning_rate": 1.1888415898961538e-05, |
| "loss": 1.2124, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.49588568612958855, |
| "grad_norm": 0.6459942965869777, |
| "learning_rate": 1.1874894973723173e-05, |
| "loss": 1.2434, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.49627987188962797, |
| "grad_norm": 0.6455190632225122, |
| "learning_rate": 1.1861370495074631e-05, |
| "loss": 1.1948, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.49667405764966743, |
| "grad_norm": 0.6611317837642312, |
| "learning_rate": 1.1847842488648296e-05, |
| "loss": 1.2226, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.49706824340970684, |
| "grad_norm": 0.6438093407353985, |
| "learning_rate": 1.1834310980083234e-05, |
| "loss": 1.1885, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.49746242916974626, |
| "grad_norm": 0.6724323601652606, |
| "learning_rate": 1.1820775995025147e-05, |
| "loss": 1.2409, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.49785661492978567, |
| "grad_norm": 0.6748553238124116, |
| "learning_rate": 1.1807237559126325e-05, |
| "loss": 1.2272, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4982508006898251, |
| "grad_norm": 0.6139036537344899, |
| "learning_rate": 1.1793695698045606e-05, |
| "loss": 1.2306, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4986449864498645, |
| "grad_norm": 0.6274786131500468, |
| "learning_rate": 1.1780150437448308e-05, |
| "loss": 1.2436, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4990391722099039, |
| "grad_norm": 0.6947108304184417, |
| "learning_rate": 1.1766601803006204e-05, |
| "loss": 1.2404, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4994333579699433, |
| "grad_norm": 0.6330610294257072, |
| "learning_rate": 1.1753049820397449e-05, |
| "loss": 1.2661, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.4998275437299828, |
| "grad_norm": 0.6526188172174275, |
| "learning_rate": 1.1739494515306553e-05, |
| "loss": 1.2404, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.5002217294900222, |
| "grad_norm": 0.6669476058696817, |
| "learning_rate": 1.172593591342432e-05, |
| "loss": 1.2259, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.5006159152500615, |
| "grad_norm": 0.6632364458454981, |
| "learning_rate": 1.1712374040447802e-05, |
| "loss": 1.2059, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.501010101010101, |
| "grad_norm": 0.6580075066736768, |
| "learning_rate": 1.1698808922080248e-05, |
| "loss": 1.2125, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.5014042867701405, |
| "grad_norm": 0.6477489624350686, |
| "learning_rate": 1.1685240584031068e-05, |
| "loss": 1.2346, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.5017984725301798, |
| "grad_norm": 0.6536067797543117, |
| "learning_rate": 1.1671669052015757e-05, |
| "loss": 1.2087, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.5021926582902193, |
| "grad_norm": 0.6652544869437115, |
| "learning_rate": 1.1658094351755883e-05, |
| "loss": 1.2333, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.5025868440502587, |
| "grad_norm": 0.6600451654966094, |
| "learning_rate": 1.1644516508978998e-05, |
| "loss": 1.213, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.5029810298102981, |
| "grad_norm": 0.6590398336514781, |
| "learning_rate": 1.1630935549418627e-05, |
| "loss": 1.2184, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.5033752155703375, |
| "grad_norm": 0.660891374872714, |
| "learning_rate": 1.1617351498814199e-05, |
| "loss": 1.2451, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.503769401330377, |
| "grad_norm": 0.6091765102262902, |
| "learning_rate": 1.1603764382910989e-05, |
| "loss": 1.1412, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.5041635870904163, |
| "grad_norm": 0.6735824808082984, |
| "learning_rate": 1.1590174227460098e-05, |
| "loss": 1.1786, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.5045577728504558, |
| "grad_norm": 0.6532363704591942, |
| "learning_rate": 1.1576581058218375e-05, |
| "loss": 1.1864, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5049519586104952, |
| "grad_norm": 0.6606502828456684, |
| "learning_rate": 1.156298490094839e-05, |
| "loss": 1.1888, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.5053461443705346, |
| "grad_norm": 0.6342921397541668, |
| "learning_rate": 1.1549385781418372e-05, |
| "loss": 1.2213, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.5057403301305741, |
| "grad_norm": 0.6689825246282982, |
| "learning_rate": 1.1535783725402163e-05, |
| "loss": 1.2618, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.5061345158906134, |
| "grad_norm": 0.640115147587615, |
| "learning_rate": 1.1522178758679172e-05, |
| "loss": 1.222, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.5065287016506529, |
| "grad_norm": 0.6676485619547307, |
| "learning_rate": 1.1508570907034325e-05, |
| "loss": 1.2239, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.5069228874106922, |
| "grad_norm": 0.6584471811582958, |
| "learning_rate": 1.1494960196258016e-05, |
| "loss": 1.2261, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.5073170731707317, |
| "grad_norm": 0.6313871712156794, |
| "learning_rate": 1.1481346652146057e-05, |
| "loss": 1.2352, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.5077112589307712, |
| "grad_norm": 0.6192657373849317, |
| "learning_rate": 1.1467730300499626e-05, |
| "loss": 1.2161, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.5081054446908105, |
| "grad_norm": 0.661823259158885, |
| "learning_rate": 1.1454111167125231e-05, |
| "loss": 1.1869, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.50849963045085, |
| "grad_norm": 0.6581281171795876, |
| "learning_rate": 1.1440489277834645e-05, |
| "loss": 1.2408, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5088938162108894, |
| "grad_norm": 0.673672216319801, |
| "learning_rate": 1.1426864658444865e-05, |
| "loss": 1.2423, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.5092880019709288, |
| "grad_norm": 0.6709234458079614, |
| "learning_rate": 1.1413237334778064e-05, |
| "loss": 1.2092, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.5096821877309682, |
| "grad_norm": 0.6704668753810613, |
| "learning_rate": 1.139960733266154e-05, |
| "loss": 1.2005, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.5100763734910077, |
| "grad_norm": 0.6665476817077829, |
| "learning_rate": 1.1385974677927667e-05, |
| "loss": 1.2879, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.510470559251047, |
| "grad_norm": 0.6491129692417508, |
| "learning_rate": 1.1372339396413845e-05, |
| "loss": 1.2029, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.5108647450110865, |
| "grad_norm": 0.6370912475464865, |
| "learning_rate": 1.1358701513962457e-05, |
| "loss": 1.2327, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.5112589307711259, |
| "grad_norm": 0.648157038901389, |
| "learning_rate": 1.134506105642081e-05, |
| "loss": 1.2124, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.5116531165311653, |
| "grad_norm": 0.6461266035285687, |
| "learning_rate": 1.1331418049641091e-05, |
| "loss": 1.1982, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.5120473022912048, |
| "grad_norm": 0.6281200807330076, |
| "learning_rate": 1.1317772519480328e-05, |
| "loss": 1.2601, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.5124414880512441, |
| "grad_norm": 0.6422476551253151, |
| "learning_rate": 1.130412449180032e-05, |
| "loss": 1.1964, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5128356738112836, |
| "grad_norm": 0.63650842337126, |
| "learning_rate": 1.1290473992467607e-05, |
| "loss": 1.2076, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.5132298595713229, |
| "grad_norm": 0.6773389045891938, |
| "learning_rate": 1.1276821047353403e-05, |
| "loss": 1.2352, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.5136240453313624, |
| "grad_norm": 0.6309296879156464, |
| "learning_rate": 1.1263165682333577e-05, |
| "loss": 1.1772, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.5140182310914019, |
| "grad_norm": 0.6765478799067353, |
| "learning_rate": 1.1249507923288563e-05, |
| "loss": 1.2115, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.5144124168514412, |
| "grad_norm": 0.6831067353554151, |
| "learning_rate": 1.1235847796103345e-05, |
| "loss": 1.2322, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.5148066026114807, |
| "grad_norm": 0.6680880986848273, |
| "learning_rate": 1.122218532666739e-05, |
| "loss": 1.2728, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.5152007883715201, |
| "grad_norm": 0.645405977896472, |
| "learning_rate": 1.1208520540874607e-05, |
| "loss": 1.2003, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.5155949741315595, |
| "grad_norm": 0.6696823139879742, |
| "learning_rate": 1.1194853464623294e-05, |
| "loss": 1.1981, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.5159891598915989, |
| "grad_norm": 0.6530439594705855, |
| "learning_rate": 1.1181184123816092e-05, |
| "loss": 1.1805, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.5163833456516383, |
| "grad_norm": 0.662122019391009, |
| "learning_rate": 1.1167512544359929e-05, |
| "loss": 1.2935, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5167775314116777, |
| "grad_norm": 0.6515187138374906, |
| "learning_rate": 1.115383875216598e-05, |
| "loss": 1.236, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.5171717171717172, |
| "grad_norm": 0.6514508648345718, |
| "learning_rate": 1.1140162773149612e-05, |
| "loss": 1.1743, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.5175659029317566, |
| "grad_norm": 0.6440703774811735, |
| "learning_rate": 1.112648463323034e-05, |
| "loss": 1.2221, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.517960088691796, |
| "grad_norm": 0.6644581716811222, |
| "learning_rate": 1.1112804358331766e-05, |
| "loss": 1.1723, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.5183542744518355, |
| "grad_norm": 0.647476681026034, |
| "learning_rate": 1.1099121974381546e-05, |
| "loss": 1.2043, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.5187484602118748, |
| "grad_norm": 0.6615768891463015, |
| "learning_rate": 1.108543750731134e-05, |
| "loss": 1.1933, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.5191426459719143, |
| "grad_norm": 0.6352447330049817, |
| "learning_rate": 1.1071750983056733e-05, |
| "loss": 1.1965, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.5195368317319536, |
| "grad_norm": 0.6515803618281081, |
| "learning_rate": 1.105806242755723e-05, |
| "loss": 1.2412, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.5199310174919931, |
| "grad_norm": 0.6408728168852139, |
| "learning_rate": 1.1044371866756178e-05, |
| "loss": 1.2595, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.5203252032520326, |
| "grad_norm": 0.6136018250584243, |
| "learning_rate": 1.1030679326600726e-05, |
| "loss": 1.1597, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5207193890120719, |
| "grad_norm": 0.6341434671207334, |
| "learning_rate": 1.1016984833041773e-05, |
| "loss": 1.1992, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.5211135747721114, |
| "grad_norm": 0.6539064660047773, |
| "learning_rate": 1.1003288412033923e-05, |
| "loss": 1.1332, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.5215077605321508, |
| "grad_norm": 0.6232171122795831, |
| "learning_rate": 1.0989590089535426e-05, |
| "loss": 1.2388, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.5219019462921902, |
| "grad_norm": 0.6877295201168714, |
| "learning_rate": 1.097588989150815e-05, |
| "loss": 1.2525, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.5222961320522296, |
| "grad_norm": 0.7115352113501258, |
| "learning_rate": 1.0962187843917498e-05, |
| "loss": 1.2115, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.522690317812269, |
| "grad_norm": 0.642946361400015, |
| "learning_rate": 1.0948483972732395e-05, |
| "loss": 1.2129, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.5230845035723084, |
| "grad_norm": 0.634552641474732, |
| "learning_rate": 1.0934778303925214e-05, |
| "loss": 1.1845, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.5234786893323479, |
| "grad_norm": 0.6716816812404441, |
| "learning_rate": 1.0921070863471732e-05, |
| "loss": 1.2202, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.5238728750923873, |
| "grad_norm": 0.6403984245235527, |
| "learning_rate": 1.09073616773511e-05, |
| "loss": 1.2436, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.5242670608524267, |
| "grad_norm": 0.6426802290331379, |
| "learning_rate": 1.089365077154576e-05, |
| "loss": 1.1759, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5246612466124662, |
| "grad_norm": 0.6528320428327657, |
| "learning_rate": 1.0879938172041415e-05, |
| "loss": 1.234, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.5250554323725055, |
| "grad_norm": 0.6343235957872947, |
| "learning_rate": 1.0866223904826992e-05, |
| "loss": 1.1482, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.525449618132545, |
| "grad_norm": 0.635182058088562, |
| "learning_rate": 1.0852507995894558e-05, |
| "loss": 1.2054, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.5258438038925843, |
| "grad_norm": 0.6367031967484378, |
| "learning_rate": 1.0838790471239314e-05, |
| "loss": 1.1575, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.5262379896526238, |
| "grad_norm": 0.6402983704212438, |
| "learning_rate": 1.0825071356859502e-05, |
| "loss": 1.1966, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.5266321754126633, |
| "grad_norm": 0.6558137431376323, |
| "learning_rate": 1.0811350678756392e-05, |
| "loss": 1.2003, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.5270263611727026, |
| "grad_norm": 0.6387053585661903, |
| "learning_rate": 1.0797628462934214e-05, |
| "loss": 1.2108, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.5274205469327421, |
| "grad_norm": 0.6086598757639083, |
| "learning_rate": 1.0783904735400103e-05, |
| "loss": 1.1663, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.5278147326927815, |
| "grad_norm": 0.6399532215520667, |
| "learning_rate": 1.0770179522164079e-05, |
| "loss": 1.2112, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.5282089184528209, |
| "grad_norm": 0.6676098681703231, |
| "learning_rate": 1.0756452849238955e-05, |
| "loss": 1.2461, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.5286031042128603, |
| "grad_norm": 0.6540029616620948, |
| "learning_rate": 1.0742724742640323e-05, |
| "loss": 1.2397, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.5289972899728997, |
| "grad_norm": 0.6538972674770378, |
| "learning_rate": 1.0728995228386496e-05, |
| "loss": 1.2309, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.5293914757329391, |
| "grad_norm": 0.6772694870371185, |
| "learning_rate": 1.0715264332498445e-05, |
| "loss": 1.258, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.5297856614929786, |
| "grad_norm": 0.6376355859195808, |
| "learning_rate": 1.0701532080999762e-05, |
| "loss": 1.2376, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.530179847253018, |
| "grad_norm": 0.663394682115222, |
| "learning_rate": 1.0687798499916613e-05, |
| "loss": 1.2073, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.5305740330130574, |
| "grad_norm": 0.6701564343777298, |
| "learning_rate": 1.0674063615277681e-05, |
| "loss": 1.2365, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.5309682187730969, |
| "grad_norm": 0.6464607961695173, |
| "learning_rate": 1.0660327453114118e-05, |
| "loss": 1.1761, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.5313624045331362, |
| "grad_norm": 0.6383382398982943, |
| "learning_rate": 1.0646590039459499e-05, |
| "loss": 1.2069, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.5317565902931757, |
| "grad_norm": 0.7250328811363568, |
| "learning_rate": 1.063285140034977e-05, |
| "loss": 1.2748, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.532150776053215, |
| "grad_norm": 0.6218566182573235, |
| "learning_rate": 1.0619111561823208e-05, |
| "loss": 1.1792, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.5325449618132545, |
| "grad_norm": 0.6491294616401706, |
| "learning_rate": 1.060537054992034e-05, |
| "loss": 1.214, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.532939147573294, |
| "grad_norm": 0.6218758954772929, |
| "learning_rate": 1.0591628390683945e-05, |
| "loss": 1.1642, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.6423851142416096, |
| "learning_rate": 1.0577885110158959e-05, |
| "loss": 1.2269, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.5337275190933728, |
| "grad_norm": 0.6619276692624474, |
| "learning_rate": 1.0564140734392445e-05, |
| "loss": 1.2517, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.5341217048534121, |
| "grad_norm": 0.6486156036656686, |
| "learning_rate": 1.0550395289433553e-05, |
| "loss": 1.2318, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.5345158906134516, |
| "grad_norm": 0.6207033641119062, |
| "learning_rate": 1.0536648801333443e-05, |
| "loss": 1.22, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.534910076373491, |
| "grad_norm": 0.6286210196563511, |
| "learning_rate": 1.0522901296145263e-05, |
| "loss": 1.2087, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.5353042621335304, |
| "grad_norm": 0.6425274380062405, |
| "learning_rate": 1.0509152799924085e-05, |
| "loss": 1.2117, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.5356984478935698, |
| "grad_norm": 0.6192586936021032, |
| "learning_rate": 1.0495403338726862e-05, |
| "loss": 1.1948, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.5360926336536093, |
| "grad_norm": 0.6377697560605069, |
| "learning_rate": 1.0481652938612374e-05, |
| "loss": 1.2518, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.5364868194136487, |
| "grad_norm": 0.6359977533800316, |
| "learning_rate": 1.0467901625641174e-05, |
| "loss": 1.1883, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.5368810051736881, |
| "grad_norm": 0.6266522995098218, |
| "learning_rate": 1.045414942587556e-05, |
| "loss": 1.1223, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.5372751909337276, |
| "grad_norm": 0.6358734881969099, |
| "learning_rate": 1.0440396365379496e-05, |
| "loss": 1.2248, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.5376693766937669, |
| "grad_norm": 0.6182266673498269, |
| "learning_rate": 1.0426642470218587e-05, |
| "loss": 1.205, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.5380635624538064, |
| "grad_norm": 0.6485743617703122, |
| "learning_rate": 1.0412887766460017e-05, |
| "loss": 1.1979, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.5384577482138457, |
| "grad_norm": 0.6392709807479522, |
| "learning_rate": 1.0399132280172494e-05, |
| "loss": 1.2084, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.5388519339738852, |
| "grad_norm": 0.6545405852048852, |
| "learning_rate": 1.0385376037426227e-05, |
| "loss": 1.265, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.5392461197339247, |
| "grad_norm": 0.6496693130292205, |
| "learning_rate": 1.0371619064292844e-05, |
| "loss": 1.2467, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.539640305493964, |
| "grad_norm": 0.6835306554548173, |
| "learning_rate": 1.035786138684536e-05, |
| "loss": 1.2406, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.5400344912540035, |
| "grad_norm": 0.6433918833824575, |
| "learning_rate": 1.034410303115813e-05, |
| "loss": 1.2708, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.5404286770140428, |
| "grad_norm": 0.6391881556502016, |
| "learning_rate": 1.0330344023306791e-05, |
| "loss": 1.229, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.5408228627740823, |
| "grad_norm": 0.6778620828218745, |
| "learning_rate": 1.0316584389368213e-05, |
| "loss": 1.2611, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.5412170485341217, |
| "grad_norm": 0.6574985715883013, |
| "learning_rate": 1.0302824155420464e-05, |
| "loss": 1.2234, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.5416112342941611, |
| "grad_norm": 0.6714841683370039, |
| "learning_rate": 1.0289063347542727e-05, |
| "loss": 1.2057, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.5420054200542005, |
| "grad_norm": 0.646623331729815, |
| "learning_rate": 1.0275301991815299e-05, |
| "loss": 1.2366, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.54239960581424, |
| "grad_norm": 0.6267893952077622, |
| "learning_rate": 1.02615401143195e-05, |
| "loss": 1.2157, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.5427937915742794, |
| "grad_norm": 0.6430429787610838, |
| "learning_rate": 1.0247777741137636e-05, |
| "loss": 1.2459, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.5431879773343188, |
| "grad_norm": 0.6315063466990641, |
| "learning_rate": 1.0234014898352966e-05, |
| "loss": 1.2342, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5435821630943583, |
| "grad_norm": 0.7220865603750691, |
| "learning_rate": 1.022025161204963e-05, |
| "loss": 1.2154, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.5439763488543976, |
| "grad_norm": 0.6377801583000084, |
| "learning_rate": 1.0206487908312607e-05, |
| "loss": 1.206, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5443705346144371, |
| "grad_norm": 0.6319172744640024, |
| "learning_rate": 1.0192723813227672e-05, |
| "loss": 1.1919, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.5447647203744764, |
| "grad_norm": 0.6364897393407957, |
| "learning_rate": 1.0178959352881337e-05, |
| "loss": 1.2146, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.5451589061345159, |
| "grad_norm": 0.6688375716623369, |
| "learning_rate": 1.0165194553360813e-05, |
| "loss": 1.2469, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5455530918945554, |
| "grad_norm": 0.662719310669721, |
| "learning_rate": 1.0151429440753948e-05, |
| "loss": 1.3032, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.5459472776545947, |
| "grad_norm": 0.6431824004552453, |
| "learning_rate": 1.0137664041149187e-05, |
| "loss": 1.2224, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5463414634146342, |
| "grad_norm": 0.6397813243923787, |
| "learning_rate": 1.0123898380635515e-05, |
| "loss": 1.1647, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5467356491746735, |
| "grad_norm": 0.6349500431531321, |
| "learning_rate": 1.011013248530241e-05, |
| "loss": 1.2286, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.547129834934713, |
| "grad_norm": 0.6355731398653511, |
| "learning_rate": 1.0096366381239808e-05, |
| "loss": 1.1548, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5475240206947524, |
| "grad_norm": 0.6272297906309461, |
| "learning_rate": 1.0082600094538029e-05, |
| "loss": 1.2372, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5479182064547918, |
| "grad_norm": 0.6514286635524038, |
| "learning_rate": 1.0068833651287736e-05, |
| "loss": 1.1854, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5483123922148312, |
| "grad_norm": 0.6434159221463395, |
| "learning_rate": 1.0055067077579894e-05, |
| "loss": 1.1649, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5487065779748707, |
| "grad_norm": 0.6534616096140339, |
| "learning_rate": 1.0041300399505724e-05, |
| "loss": 1.2058, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.5491007637349101, |
| "grad_norm": 0.6385843361048341, |
| "learning_rate": 1.0027533643156629e-05, |
| "loss": 1.206, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5494949494949495, |
| "grad_norm": 0.654135497386305, |
| "learning_rate": 1.0013766834624168e-05, |
| "loss": 1.2947, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.549889135254989, |
| "grad_norm": 0.6527260856281124, |
| "learning_rate": 1e-05, |
| "loss": 1.2067, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5502833210150283, |
| "grad_norm": 0.6456506343549768, |
| "learning_rate": 9.986233165375837e-06, |
| "loss": 1.2799, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5506775067750678, |
| "grad_norm": 0.7246957748680044, |
| "learning_rate": 9.972466356843375e-06, |
| "loss": 1.3271, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5510716925351071, |
| "grad_norm": 0.6399327077783894, |
| "learning_rate": 9.95869960049428e-06, |
| "loss": 1.2443, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5514658782951466, |
| "grad_norm": 0.6241508398727628, |
| "learning_rate": 9.944932922420109e-06, |
| "loss": 1.2007, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5518600640551861, |
| "grad_norm": 0.614559476153416, |
| "learning_rate": 9.931166348712268e-06, |
| "loss": 1.1704, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5522542498152254, |
| "grad_norm": 0.6304080966033335, |
| "learning_rate": 9.917399905461974e-06, |
| "loss": 1.1869, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5526484355752649, |
| "grad_norm": 0.6412439956786309, |
| "learning_rate": 9.903633618760195e-06, |
| "loss": 1.1782, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.5530426213353042, |
| "grad_norm": 0.6557358908407644, |
| "learning_rate": 9.889867514697591e-06, |
| "loss": 1.225, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5534368070953437, |
| "grad_norm": 0.6212875821927828, |
| "learning_rate": 9.876101619364487e-06, |
| "loss": 1.196, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5538309928553831, |
| "grad_norm": 0.613555231324674, |
| "learning_rate": 9.862335958850816e-06, |
| "loss": 1.1592, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.5542251786154225, |
| "grad_norm": 0.6745935115478964, |
| "learning_rate": 9.848570559246055e-06, |
| "loss": 1.1877, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5546193643754619, |
| "grad_norm": 0.6410977347319441, |
| "learning_rate": 9.834805446639187e-06, |
| "loss": 1.1612, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5550135501355014, |
| "grad_norm": 0.6309144641717204, |
| "learning_rate": 9.821040647118666e-06, |
| "loss": 1.1425, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5554077358955408, |
| "grad_norm": 0.6299676272735365, |
| "learning_rate": 9.807276186772335e-06, |
| "loss": 1.208, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5558019216555802, |
| "grad_norm": 0.6178102722375627, |
| "learning_rate": 9.793512091687396e-06, |
| "loss": 1.1846, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5561961074156196, |
| "grad_norm": 0.622166600700565, |
| "learning_rate": 9.779748387950372e-06, |
| "loss": 1.1662, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.556590293175659, |
| "grad_norm": 0.6600214723637224, |
| "learning_rate": 9.765985101647037e-06, |
| "loss": 1.2892, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5569844789356985, |
| "grad_norm": 0.6176714958995365, |
| "learning_rate": 9.752222258862364e-06, |
| "loss": 1.1706, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5573786646957378, |
| "grad_norm": 0.5939231448625044, |
| "learning_rate": 9.738459885680502e-06, |
| "loss": 1.1488, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.5577728504557773, |
| "grad_norm": 0.6352717829639574, |
| "learning_rate": 9.724698008184705e-06, |
| "loss": 1.2017, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5581670362158168, |
| "grad_norm": 0.6167223796720016, |
| "learning_rate": 9.710936652457276e-06, |
| "loss": 1.1228, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5585612219758561, |
| "grad_norm": 0.6213254460946624, |
| "learning_rate": 9.69717584457954e-06, |
| "loss": 1.184, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5589554077358956, |
| "grad_norm": 0.6131341167960235, |
| "learning_rate": 9.683415610631788e-06, |
| "loss": 1.161, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.5593495934959349, |
| "grad_norm": 0.6296617155093078, |
| "learning_rate": 9.669655976693214e-06, |
| "loss": 1.1642, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5597437792559744, |
| "grad_norm": 0.6153554191014486, |
| "learning_rate": 9.655896968841873e-06, |
| "loss": 1.2156, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5601379650160138, |
| "grad_norm": 0.6392439227341541, |
| "learning_rate": 9.642138613154643e-06, |
| "loss": 1.1957, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.5605321507760532, |
| "grad_norm": 0.6260052735651341, |
| "learning_rate": 9.62838093570716e-06, |
| "loss": 1.1974, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5609263365360926, |
| "grad_norm": 0.6334362558009554, |
| "learning_rate": 9.614623962573776e-06, |
| "loss": 1.1965, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.561320522296132, |
| "grad_norm": 0.6179635946785395, |
| "learning_rate": 9.600867719827507e-06, |
| "loss": 1.1606, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5617147080561715, |
| "grad_norm": 0.675892965228182, |
| "learning_rate": 9.587112233539988e-06, |
| "loss": 1.2698, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5621088938162109, |
| "grad_norm": 0.6269199497256357, |
| "learning_rate": 9.573357529781413e-06, |
| "loss": 1.1738, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5625030795762503, |
| "grad_norm": 0.6206668162899066, |
| "learning_rate": 9.559603634620505e-06, |
| "loss": 1.1545, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5628972653362897, |
| "grad_norm": 0.6392518680745602, |
| "learning_rate": 9.545850574124444e-06, |
| "loss": 1.2394, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5632914510963292, |
| "grad_norm": 0.6554357478989767, |
| "learning_rate": 9.532098374358828e-06, |
| "loss": 1.2056, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5636856368563685, |
| "grad_norm": 0.6321993644191258, |
| "learning_rate": 9.518347061387629e-06, |
| "loss": 1.2424, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.564079822616408, |
| "grad_norm": 0.6342077276536365, |
| "learning_rate": 9.504596661273141e-06, |
| "loss": 1.216, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.5644740083764475, |
| "grad_norm": 0.655567194868911, |
| "learning_rate": 9.490847200075919e-06, |
| "loss": 1.2236, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5648681941364868, |
| "grad_norm": 0.6452206424611665, |
| "learning_rate": 9.47709870385474e-06, |
| "loss": 1.1493, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5652623798965263, |
| "grad_norm": 0.6551732071227462, |
| "learning_rate": 9.46335119866656e-06, |
| "loss": 1.2243, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5656565656565656, |
| "grad_norm": 0.638292981830309, |
| "learning_rate": 9.449604710566452e-06, |
| "loss": 1.2154, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5660507514166051, |
| "grad_norm": 0.6434536189993397, |
| "learning_rate": 9.435859265607555e-06, |
| "loss": 1.2622, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5664449371766445, |
| "grad_norm": 0.6235727133771496, |
| "learning_rate": 9.422114889841045e-06, |
| "loss": 1.2097, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5668391229366839, |
| "grad_norm": 0.6380544846865114, |
| "learning_rate": 9.40837160931606e-06, |
| "loss": 1.1931, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5672333086967233, |
| "grad_norm": 0.6070307134735536, |
| "learning_rate": 9.394629450079661e-06, |
| "loss": 1.1728, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5676274944567627, |
| "grad_norm": 0.6261762404486911, |
| "learning_rate": 9.380888438176797e-06, |
| "loss": 1.2047, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5680216802168022, |
| "grad_norm": 0.6148402557876401, |
| "learning_rate": 9.367148599650231e-06, |
| "loss": 1.1782, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5684158659768416, |
| "grad_norm": 0.6153367707877275, |
| "learning_rate": 9.353409960540506e-06, |
| "loss": 1.1333, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.568810051736881, |
| "grad_norm": 0.6401365387127351, |
| "learning_rate": 9.339672546885885e-06, |
| "loss": 1.2479, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5692042374969204, |
| "grad_norm": 0.6301673949669812, |
| "learning_rate": 9.325936384722322e-06, |
| "loss": 1.2015, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.5695984232569599, |
| "grad_norm": 0.6286144736358145, |
| "learning_rate": 9.312201500083392e-06, |
| "loss": 1.2487, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5699926090169992, |
| "grad_norm": 0.6171822342295599, |
| "learning_rate": 9.29846791900024e-06, |
| "loss": 1.1904, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5703867947770387, |
| "grad_norm": 0.6428565759737676, |
| "learning_rate": 9.284735667501558e-06, |
| "loss": 1.1679, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5707809805370782, |
| "grad_norm": 0.6151703289847316, |
| "learning_rate": 9.271004771613509e-06, |
| "loss": 1.1246, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5711751662971175, |
| "grad_norm": 0.6398686829564575, |
| "learning_rate": 9.257275257359679e-06, |
| "loss": 1.1657, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.571569352057157, |
| "grad_norm": 0.6243382952424049, |
| "learning_rate": 9.243547150761047e-06, |
| "loss": 1.1966, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5719635378171963, |
| "grad_norm": 0.6408741873334287, |
| "learning_rate": 9.229820477835926e-06, |
| "loss": 1.2205, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5723577235772358, |
| "grad_norm": 0.633552764994025, |
| "learning_rate": 9.216095264599895e-06, |
| "loss": 1.2252, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5727519093372752, |
| "grad_norm": 0.6511108996685305, |
| "learning_rate": 9.202371537065788e-06, |
| "loss": 1.2656, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5731460950973146, |
| "grad_norm": 0.6529280803122515, |
| "learning_rate": 9.18864932124361e-06, |
| "loss": 1.2239, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.573540280857354, |
| "grad_norm": 0.647401441010935, |
| "learning_rate": 9.1749286431405e-06, |
| "loss": 1.2716, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5739344666173934, |
| "grad_norm": 0.642622817859945, |
| "learning_rate": 9.161209528760691e-06, |
| "loss": 1.2222, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5743286523774329, |
| "grad_norm": 0.6320811079325271, |
| "learning_rate": 9.147492004105443e-06, |
| "loss": 1.2481, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.5747228381374723, |
| "grad_norm": 0.6326782165239981, |
| "learning_rate": 9.133776095173015e-06, |
| "loss": 1.2739, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5751170238975117, |
| "grad_norm": 0.6625216988220546, |
| "learning_rate": 9.120061827958586e-06, |
| "loss": 1.2355, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5755112096575511, |
| "grad_norm": 0.6213952483408215, |
| "learning_rate": 9.106349228454242e-06, |
| "loss": 1.1701, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.5759053954175906, |
| "grad_norm": 0.6158204977575528, |
| "learning_rate": 9.092638322648904e-06, |
| "loss": 1.2463, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5762995811776299, |
| "grad_norm": 0.6128069866736511, |
| "learning_rate": 9.078929136528267e-06, |
| "loss": 1.1581, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5766937669376694, |
| "grad_norm": 0.6618087745723823, |
| "learning_rate": 9.06522169607479e-06, |
| "loss": 1.1823, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5770879526977089, |
| "grad_norm": 0.6783150244501504, |
| "learning_rate": 9.05151602726761e-06, |
| "loss": 1.2302, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5774821384577482, |
| "grad_norm": 0.6503369713306525, |
| "learning_rate": 9.037812156082503e-06, |
| "loss": 1.2407, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5778763242177877, |
| "grad_norm": 0.6456712064826, |
| "learning_rate": 9.024110108491855e-06, |
| "loss": 1.1609, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.578270509977827, |
| "grad_norm": 0.6486197805925519, |
| "learning_rate": 9.010409910464575e-06, |
| "loss": 1.2222, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5786646957378665, |
| "grad_norm": 0.7436596366499776, |
| "learning_rate": 8.996711587966079e-06, |
| "loss": 1.2581, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5790588814979059, |
| "grad_norm": 0.6261635281880413, |
| "learning_rate": 8.983015166958228e-06, |
| "loss": 1.2161, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5794530672579453, |
| "grad_norm": 0.6443605688870468, |
| "learning_rate": 8.969320673399276e-06, |
| "loss": 1.1791, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5798472530179847, |
| "grad_norm": 0.671825587927519, |
| "learning_rate": 8.955628133243828e-06, |
| "loss": 1.218, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5802414387780241, |
| "grad_norm": 0.6434248476334178, |
| "learning_rate": 8.941937572442773e-06, |
| "loss": 1.1846, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5806356245380636, |
| "grad_norm": 0.6254667200582976, |
| "learning_rate": 8.92824901694327e-06, |
| "loss": 1.2353, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.581029810298103, |
| "grad_norm": 0.6232654021330023, |
| "learning_rate": 8.914562492688667e-06, |
| "loss": 1.114, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5814239960581424, |
| "grad_norm": 0.6299635353186261, |
| "learning_rate": 8.900878025618453e-06, |
| "loss": 1.2504, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 0.6833411898307228, |
| "learning_rate": 8.887195641668235e-06, |
| "loss": 1.2404, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5822123675782213, |
| "grad_norm": 0.6669528413277209, |
| "learning_rate": 8.873515366769666e-06, |
| "loss": 1.1557, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5826065533382606, |
| "grad_norm": 0.6340389941502457, |
| "learning_rate": 8.85983722685039e-06, |
| "loss": 1.1978, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5830007390983001, |
| "grad_norm": 0.6504266413875779, |
| "learning_rate": 8.846161247834024e-06, |
| "loss": 1.2026, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5833949248583395, |
| "grad_norm": 0.623448080239467, |
| "learning_rate": 8.832487455640074e-06, |
| "loss": 1.1968, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5837891106183789, |
| "grad_norm": 0.6377332989581492, |
| "learning_rate": 8.81881587618391e-06, |
| "loss": 1.1794, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5841832963784184, |
| "grad_norm": 0.6487050264881453, |
| "learning_rate": 8.805146535376709e-06, |
| "loss": 1.2329, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5845774821384577, |
| "grad_norm": 0.6866850553685105, |
| "learning_rate": 8.791479459125396e-06, |
| "loss": 1.2786, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.5849716678984972, |
| "grad_norm": 0.6241541462965179, |
| "learning_rate": 8.777814673332615e-06, |
| "loss": 1.1997, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5853658536585366, |
| "grad_norm": 0.6488269216574984, |
| "learning_rate": 8.764152203896658e-06, |
| "loss": 1.1873, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.585760039418576, |
| "grad_norm": 0.6518659909159534, |
| "learning_rate": 8.750492076711439e-06, |
| "loss": 1.1964, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.5861542251786154, |
| "grad_norm": 0.6379498327658182, |
| "learning_rate": 8.736834317666428e-06, |
| "loss": 1.19, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5865484109386548, |
| "grad_norm": 0.6073862610155873, |
| "learning_rate": 8.723178952646597e-06, |
| "loss": 1.1497, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5869425966986943, |
| "grad_norm": 0.6335121996922907, |
| "learning_rate": 8.709526007532396e-06, |
| "loss": 1.1905, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5873367824587337, |
| "grad_norm": 0.6478757542846147, |
| "learning_rate": 8.695875508199683e-06, |
| "loss": 1.1726, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5877309682187731, |
| "grad_norm": 0.6801004693955225, |
| "learning_rate": 8.682227480519672e-06, |
| "loss": 1.1956, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5881251539788125, |
| "grad_norm": 0.6869506155016226, |
| "learning_rate": 8.66858195035891e-06, |
| "loss": 1.2158, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.588519339738852, |
| "grad_norm": 0.6328860065449554, |
| "learning_rate": 8.654938943579194e-06, |
| "loss": 1.1986, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5889135254988913, |
| "grad_norm": 0.6966371382556359, |
| "learning_rate": 8.641298486037543e-06, |
| "loss": 1.2219, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5893077112589308, |
| "grad_norm": 0.6706456600510302, |
| "learning_rate": 8.627660603586157e-06, |
| "loss": 1.2992, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5897018970189702, |
| "grad_norm": 0.6634528939701451, |
| "learning_rate": 8.614025322072338e-06, |
| "loss": 1.2412, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5900960827790096, |
| "grad_norm": 0.6101971245071337, |
| "learning_rate": 8.600392667338465e-06, |
| "loss": 1.1347, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5904902685390491, |
| "grad_norm": 0.640682969790413, |
| "learning_rate": 8.58676266522194e-06, |
| "loss": 1.2015, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5908844542990884, |
| "grad_norm": 0.648892739773898, |
| "learning_rate": 8.573135341555138e-06, |
| "loss": 1.1751, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.5912786400591279, |
| "grad_norm": 0.6497240357012373, |
| "learning_rate": 8.55951072216536e-06, |
| "loss": 1.2231, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5916728258191672, |
| "grad_norm": 0.653343396545042, |
| "learning_rate": 8.54588883287477e-06, |
| "loss": 1.1746, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5920670115792067, |
| "grad_norm": 0.6432488267867399, |
| "learning_rate": 8.532269699500377e-06, |
| "loss": 1.1574, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.5924611973392461, |
| "grad_norm": 0.6545865486299587, |
| "learning_rate": 8.518653347853948e-06, |
| "loss": 1.2443, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5928553830992855, |
| "grad_norm": 0.7869569426495164, |
| "learning_rate": 8.505039803741985e-06, |
| "loss": 1.2115, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.593249568859325, |
| "grad_norm": 0.61279157223736, |
| "learning_rate": 8.491429092965677e-06, |
| "loss": 1.1301, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5936437546193644, |
| "grad_norm": 0.6584615054581199, |
| "learning_rate": 8.477821241320831e-06, |
| "loss": 1.1872, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5940379403794038, |
| "grad_norm": 0.6651037222509211, |
| "learning_rate": 8.464216274597839e-06, |
| "loss": 1.1699, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5944321261394432, |
| "grad_norm": 0.6192362295929023, |
| "learning_rate": 8.450614218581631e-06, |
| "loss": 1.2301, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5948263118994827, |
| "grad_norm": 0.6063957302686086, |
| "learning_rate": 8.437015099051613e-06, |
| "loss": 1.1558, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.595220497659522, |
| "grad_norm": 0.6463493132821347, |
| "learning_rate": 8.42341894178163e-06, |
| "loss": 1.2595, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5956146834195615, |
| "grad_norm": 0.6177688405321609, |
| "learning_rate": 8.409825772539905e-06, |
| "loss": 1.174, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5960088691796009, |
| "grad_norm": 0.6181575708603189, |
| "learning_rate": 8.396235617089013e-06, |
| "loss": 1.1953, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5964030549396403, |
| "grad_norm": 0.6232523590903218, |
| "learning_rate": 8.382648501185806e-06, |
| "loss": 1.2131, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5967972406996798, |
| "grad_norm": 0.6853964780387746, |
| "learning_rate": 8.369064450581374e-06, |
| "loss": 1.2397, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5971914264597191, |
| "grad_norm": 0.638261822593998, |
| "learning_rate": 8.355483491021007e-06, |
| "loss": 1.1697, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5975856122197586, |
| "grad_norm": 0.6345858720982844, |
| "learning_rate": 8.341905648244122e-06, |
| "loss": 1.198, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.597979797979798, |
| "grad_norm": 0.6205371649965156, |
| "learning_rate": 8.328330947984243e-06, |
| "loss": 1.1509, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5983739837398374, |
| "grad_norm": 0.6780688159415363, |
| "learning_rate": 8.314759415968936e-06, |
| "loss": 1.2359, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5987681694998768, |
| "grad_norm": 0.6375070575615467, |
| "learning_rate": 8.301191077919753e-06, |
| "loss": 1.2035, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5991623552599162, |
| "grad_norm": 0.622909906771207, |
| "learning_rate": 8.2876259595522e-06, |
| "loss": 1.2104, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5995565410199557, |
| "grad_norm": 0.6094392519833095, |
| "learning_rate": 8.274064086575682e-06, |
| "loss": 1.1475, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5999507267799951, |
| "grad_norm": 0.621252910798821, |
| "learning_rate": 8.260505484693449e-06, |
| "loss": 1.1864, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.6003449125400345, |
| "grad_norm": 0.6698438223208214, |
| "learning_rate": 8.246950179602554e-06, |
| "loss": 1.1991, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.6007390983000739, |
| "grad_norm": 0.6520795365380274, |
| "learning_rate": 8.2333981969938e-06, |
| "loss": 1.1769, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.6011332840601133, |
| "grad_norm": 0.6522360114294746, |
| "learning_rate": 8.219849562551695e-06, |
| "loss": 1.2025, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.6015274698201527, |
| "grad_norm": 0.6295823752577447, |
| "learning_rate": 8.206304301954397e-06, |
| "loss": 1.1339, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.6019216555801922, |
| "grad_norm": 0.6483586741712484, |
| "learning_rate": 8.192762440873675e-06, |
| "loss": 1.1893, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.6023158413402316, |
| "grad_norm": 0.6574976200875523, |
| "learning_rate": 8.179224004974857e-06, |
| "loss": 1.1948, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.602710027100271, |
| "grad_norm": 0.6592927070571326, |
| "learning_rate": 8.165689019916769e-06, |
| "loss": 1.1865, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.6031042128603105, |
| "grad_norm": 0.6602088196871608, |
| "learning_rate": 8.152157511351704e-06, |
| "loss": 1.2788, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6034983986203498, |
| "grad_norm": 0.5966682622148229, |
| "learning_rate": 8.138629504925372e-06, |
| "loss": 1.1035, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.6038925843803893, |
| "grad_norm": 0.6472735298836796, |
| "learning_rate": 8.125105026276832e-06, |
| "loss": 1.2211, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.6042867701404286, |
| "grad_norm": 0.647741738867434, |
| "learning_rate": 8.111584101038462e-06, |
| "loss": 1.2187, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.6046809559004681, |
| "grad_norm": 0.6404826084219543, |
| "learning_rate": 8.098066754835916e-06, |
| "loss": 1.1788, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.6050751416605075, |
| "grad_norm": 0.6124100298486728, |
| "learning_rate": 8.084553013288048e-06, |
| "loss": 1.1426, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.6054693274205469, |
| "grad_norm": 0.6344901181171149, |
| "learning_rate": 8.071042902006896e-06, |
| "loss": 1.2431, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.6058635131805864, |
| "grad_norm": 0.6328920930143503, |
| "learning_rate": 8.057536446597598e-06, |
| "loss": 1.2025, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.6062576989406258, |
| "grad_norm": 0.6519280491300705, |
| "learning_rate": 8.044033672658387e-06, |
| "loss": 1.2351, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.6066518847006652, |
| "grad_norm": 0.6725946251767152, |
| "learning_rate": 8.0305346057805e-06, |
| "loss": 1.2485, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.6070460704607046, |
| "grad_norm": 0.657229000221368, |
| "learning_rate": 8.017039271548154e-06, |
| "loss": 1.1958, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.607440256220744, |
| "grad_norm": 0.63930798917721, |
| "learning_rate": 8.0035476955385e-06, |
| "loss": 1.2539, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.6078344419807834, |
| "grad_norm": 0.6356269105691521, |
| "learning_rate": 7.990059903321554e-06, |
| "loss": 1.174, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.6082286277408229, |
| "grad_norm": 0.6421402197109457, |
| "learning_rate": 7.97657592046016e-06, |
| "loss": 1.2085, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.6086228135008623, |
| "grad_norm": 0.6489422328975518, |
| "learning_rate": 7.96309577250996e-06, |
| "loss": 1.2387, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.6090169992609017, |
| "grad_norm": 0.6530006388057895, |
| "learning_rate": 7.949619485019307e-06, |
| "loss": 1.2009, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.6094111850209412, |
| "grad_norm": 0.6416958127168939, |
| "learning_rate": 7.936147083529245e-06, |
| "loss": 1.2154, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.6098053707809805, |
| "grad_norm": 0.6337303333525649, |
| "learning_rate": 7.922678593573462e-06, |
| "loss": 1.1974, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.61019955654102, |
| "grad_norm": 0.6637031259257837, |
| "learning_rate": 7.90921404067822e-06, |
| "loss": 1.2052, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.6105937423010593, |
| "grad_norm": 0.6473009660413165, |
| "learning_rate": 7.89575345036232e-06, |
| "loss": 1.2473, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.6109879280610988, |
| "grad_norm": 0.6261555671205469, |
| "learning_rate": 7.882296848137063e-06, |
| "loss": 1.2066, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6113821138211382, |
| "grad_norm": 0.6177349103271258, |
| "learning_rate": 7.868844259506186e-06, |
| "loss": 1.1547, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.6117762995811776, |
| "grad_norm": 0.6264274304099752, |
| "learning_rate": 7.855395709965814e-06, |
| "loss": 1.2039, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.6121704853412171, |
| "grad_norm": 0.6208965372231373, |
| "learning_rate": 7.84195122500442e-06, |
| "loss": 1.1659, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.6125646711012565, |
| "grad_norm": 0.6182902432180839, |
| "learning_rate": 7.828510830102785e-06, |
| "loss": 1.1802, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.6129588568612959, |
| "grad_norm": 0.6010062493402437, |
| "learning_rate": 7.815074550733919e-06, |
| "loss": 1.1624, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.6133530426213353, |
| "grad_norm": 0.6100632398399762, |
| "learning_rate": 7.801642412363042e-06, |
| "loss": 1.1588, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.6137472283813747, |
| "grad_norm": 0.6244968785224004, |
| "learning_rate": 7.788214440447532e-06, |
| "loss": 1.16, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.6141414141414141, |
| "grad_norm": 0.6262394381187797, |
| "learning_rate": 7.774790660436857e-06, |
| "loss": 1.1379, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.6145355999014536, |
| "grad_norm": 0.6268360201286511, |
| "learning_rate": 7.761371097772548e-06, |
| "loss": 1.1632, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.614929785661493, |
| "grad_norm": 0.6450865669879012, |
| "learning_rate": 7.747955777888145e-06, |
| "loss": 1.1762, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.6153239714215324, |
| "grad_norm": 0.6424738031868468, |
| "learning_rate": 7.734544726209143e-06, |
| "loss": 1.1559, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.6157181571815719, |
| "grad_norm": 0.637950698301497, |
| "learning_rate": 7.721137968152944e-06, |
| "loss": 1.1831, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.6161123429416112, |
| "grad_norm": 0.6186538417807995, |
| "learning_rate": 7.707735529128819e-06, |
| "loss": 1.1962, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.6165065287016507, |
| "grad_norm": 0.6181805636977189, |
| "learning_rate": 7.694337434537856e-06, |
| "loss": 1.1768, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.61690071446169, |
| "grad_norm": 0.6254768111350152, |
| "learning_rate": 7.680943709772899e-06, |
| "loss": 1.1604, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.6172949002217295, |
| "grad_norm": 0.644104659671372, |
| "learning_rate": 7.667554380218513e-06, |
| "loss": 1.2107, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.6176890859817689, |
| "grad_norm": 0.6537180884599917, |
| "learning_rate": 7.654169471250945e-06, |
| "loss": 1.2834, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.6180832717418083, |
| "grad_norm": 0.6361808370235917, |
| "learning_rate": 7.640789008238044e-06, |
| "loss": 1.1062, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.6184774575018478, |
| "grad_norm": 0.6523288827402758, |
| "learning_rate": 7.627413016539247e-06, |
| "loss": 1.1986, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.6188716432618871, |
| "grad_norm": 0.6285054549406514, |
| "learning_rate": 7.614041521505517e-06, |
| "loss": 1.1758, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.6192658290219266, |
| "grad_norm": 0.6272952169331758, |
| "learning_rate": 7.6006745484792855e-06, |
| "loss": 1.1788, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.619660014781966, |
| "grad_norm": 0.6500656109205114, |
| "learning_rate": 7.587312122794414e-06, |
| "loss": 1.2231, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.6200542005420054, |
| "grad_norm": 0.6954118875061881, |
| "learning_rate": 7.5739542697761615e-06, |
| "loss": 1.2549, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.6204483863020448, |
| "grad_norm": 0.6226893727767379, |
| "learning_rate": 7.560601014741103e-06, |
| "loss": 1.1388, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.6208425720620843, |
| "grad_norm": 0.6505634755873115, |
| "learning_rate": 7.547252382997101e-06, |
| "loss": 1.2098, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.6212367578221237, |
| "grad_norm": 0.6498328807173522, |
| "learning_rate": 7.533908399843266e-06, |
| "loss": 1.1734, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.6216309435821631, |
| "grad_norm": 0.6761129099478455, |
| "learning_rate": 7.520569090569894e-06, |
| "loss": 1.1757, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.6220251293422026, |
| "grad_norm": 0.6971630762485974, |
| "learning_rate": 7.507234480458414e-06, |
| "loss": 1.2566, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.6224193151022419, |
| "grad_norm": 0.6237942794960373, |
| "learning_rate": 7.493904594781358e-06, |
| "loss": 1.1296, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.6228135008622814, |
| "grad_norm": 0.6295586177215396, |
| "learning_rate": 7.4805794588023086e-06, |
| "loss": 1.1169, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.6232076866223207, |
| "grad_norm": 0.6408732189903159, |
| "learning_rate": 7.4672590977758295e-06, |
| "loss": 1.1301, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.6236018723823602, |
| "grad_norm": 0.6771354689742808, |
| "learning_rate": 7.45394353694745e-06, |
| "loss": 1.2348, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.6239960581423996, |
| "grad_norm": 0.640613127950835, |
| "learning_rate": 7.4406328015536e-06, |
| "loss": 1.196, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.624390243902439, |
| "grad_norm": 0.650879151108994, |
| "learning_rate": 7.427326916821557e-06, |
| "loss": 1.1784, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.6247844296624785, |
| "grad_norm": 0.6596072847031024, |
| "learning_rate": 7.414025907969404e-06, |
| "loss": 1.2214, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.6251786154225178, |
| "grad_norm": 0.6278635059421687, |
| "learning_rate": 7.4007298002059965e-06, |
| "loss": 1.1567, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.6255728011825573, |
| "grad_norm": 0.6225891858209661, |
| "learning_rate": 7.387438618730891e-06, |
| "loss": 1.1644, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.6259669869425967, |
| "grad_norm": 0.6387712671736495, |
| "learning_rate": 7.3741523887343015e-06, |
| "loss": 1.1932, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.6263611727026361, |
| "grad_norm": 0.6731157388955487, |
| "learning_rate": 7.360871135397072e-06, |
| "loss": 1.2878, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.6267553584626755, |
| "grad_norm": 0.6067881423807671, |
| "learning_rate": 7.347594883890608e-06, |
| "loss": 1.1341, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.627149544222715, |
| "grad_norm": 0.6315807367438574, |
| "learning_rate": 7.3343236593768295e-06, |
| "loss": 1.15, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.6275437299827544, |
| "grad_norm": 0.6828787333827238, |
| "learning_rate": 7.321057487008136e-06, |
| "loss": 1.2797, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.6279379157427938, |
| "grad_norm": 0.636378285588495, |
| "learning_rate": 7.307796391927356e-06, |
| "loss": 1.2114, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.6283321015028333, |
| "grad_norm": 0.6227706869499603, |
| "learning_rate": 7.294540399267682e-06, |
| "loss": 1.2107, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.6287262872628726, |
| "grad_norm": 0.6542527940502086, |
| "learning_rate": 7.281289534152644e-06, |
| "loss": 1.1301, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.6291204730229121, |
| "grad_norm": 0.6481496871980028, |
| "learning_rate": 7.268043821696062e-06, |
| "loss": 1.2319, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.6295146587829514, |
| "grad_norm": 0.6445223927771241, |
| "learning_rate": 7.254803287001975e-06, |
| "loss": 1.2334, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.6299088445429909, |
| "grad_norm": 0.6329838727914758, |
| "learning_rate": 7.24156795516461e-06, |
| "loss": 1.1496, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.6303030303030303, |
| "grad_norm": 0.6299335180741068, |
| "learning_rate": 7.22833785126835e-06, |
| "loss": 1.184, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.6306972160630697, |
| "grad_norm": 0.6284096678702693, |
| "learning_rate": 7.215113000387654e-06, |
| "loss": 1.254, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.6310914018231092, |
| "grad_norm": 0.6324689716112708, |
| "learning_rate": 7.201893427587026e-06, |
| "loss": 1.1721, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.6314855875831485, |
| "grad_norm": 0.6858753419716495, |
| "learning_rate": 7.188679157920977e-06, |
| "loss": 1.1898, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.631879773343188, |
| "grad_norm": 0.6556988105872994, |
| "learning_rate": 7.1754702164339575e-06, |
| "loss": 1.2545, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.6322739591032274, |
| "grad_norm": 0.6195080831875678, |
| "learning_rate": 7.1622666281603235e-06, |
| "loss": 1.2272, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.6326681448632668, |
| "grad_norm": 0.6586395858980946, |
| "learning_rate": 7.149068418124281e-06, |
| "loss": 1.2194, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.6330623306233062, |
| "grad_norm": 0.6447888871223056, |
| "learning_rate": 7.1358756113398545e-06, |
| "loss": 1.2575, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.6334565163833457, |
| "grad_norm": 0.60959438103777, |
| "learning_rate": 7.122688232810815e-06, |
| "loss": 1.2215, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.6338507021433851, |
| "grad_norm": 0.6336168777241095, |
| "learning_rate": 7.109506307530646e-06, |
| "loss": 1.2274, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.6342448879034245, |
| "grad_norm": 0.6166032302997211, |
| "learning_rate": 7.096329860482507e-06, |
| "loss": 1.2061, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.634639073663464, |
| "grad_norm": 0.6674971360893448, |
| "learning_rate": 7.083158916639169e-06, |
| "loss": 1.3014, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.6350332594235033, |
| "grad_norm": 0.6542997563204203, |
| "learning_rate": 7.069993500962964e-06, |
| "loss": 1.139, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.6354274451835428, |
| "grad_norm": 0.6233870945052585, |
| "learning_rate": 7.056833638405762e-06, |
| "loss": 1.1705, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.6358216309435821, |
| "grad_norm": 0.6532480222627909, |
| "learning_rate": 7.043679353908901e-06, |
| "loss": 1.2109, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.6362158167036216, |
| "grad_norm": 0.6249185015676082, |
| "learning_rate": 7.0305306724031396e-06, |
| "loss": 1.1821, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.636610002463661, |
| "grad_norm": 0.6218410031542252, |
| "learning_rate": 7.017387618808634e-06, |
| "loss": 1.1483, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.6370041882237004, |
| "grad_norm": 0.6490684142962722, |
| "learning_rate": 7.0042502180348635e-06, |
| "loss": 1.2157, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.6373983739837399, |
| "grad_norm": 0.6034827634471542, |
| "learning_rate": 6.991118494980591e-06, |
| "loss": 1.1842, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.6377925597437792, |
| "grad_norm": 0.6274462711346118, |
| "learning_rate": 6.977992474533823e-06, |
| "loss": 1.2361, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.6381867455038187, |
| "grad_norm": 0.6760850255550227, |
| "learning_rate": 6.964872181571765e-06, |
| "loss": 1.1862, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.6385809312638581, |
| "grad_norm": 0.6396402151072694, |
| "learning_rate": 6.9517576409607545e-06, |
| "loss": 1.2231, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.6389751170238975, |
| "grad_norm": 0.6338829150069218, |
| "learning_rate": 6.938648877556231e-06, |
| "loss": 1.2246, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.6393693027839369, |
| "grad_norm": 0.6473593135129597, |
| "learning_rate": 6.925545916202692e-06, |
| "loss": 1.2431, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.6397634885439764, |
| "grad_norm": 0.6401312934763702, |
| "learning_rate": 6.912448781733633e-06, |
| "loss": 1.2157, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.6401576743040158, |
| "grad_norm": 0.6399148681302655, |
| "learning_rate": 6.8993574989714995e-06, |
| "loss": 1.1838, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.6405518600640552, |
| "grad_norm": 0.5966358662573188, |
| "learning_rate": 6.88627209272766e-06, |
| "loss": 1.1593, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.6409460458240946, |
| "grad_norm": 0.6516019968106155, |
| "learning_rate": 6.87319258780234e-06, |
| "loss": 1.1743, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.641340231584134, |
| "grad_norm": 0.623888477031532, |
| "learning_rate": 6.860119008984569e-06, |
| "loss": 1.2352, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.6417344173441735, |
| "grad_norm": 0.6462585435255515, |
| "learning_rate": 6.847051381052165e-06, |
| "loss": 1.1955, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.6421286031042128, |
| "grad_norm": 0.6285337684977241, |
| "learning_rate": 6.833989728771657e-06, |
| "loss": 1.2102, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.6425227888642523, |
| "grad_norm": 0.6313390139589669, |
| "learning_rate": 6.820934076898247e-06, |
| "loss": 1.209, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.6429169746242916, |
| "grad_norm": 0.6219389731857671, |
| "learning_rate": 6.8078844501757625e-06, |
| "loss": 1.1647, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.6433111603843311, |
| "grad_norm": 0.6255385020113866, |
| "learning_rate": 6.794840873336622e-06, |
| "loss": 1.2185, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.6437053461443706, |
| "grad_norm": 0.6214536562298445, |
| "learning_rate": 6.781803371101774e-06, |
| "loss": 1.2235, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.6440995319044099, |
| "grad_norm": 0.6520907124359351, |
| "learning_rate": 6.768771968180643e-06, |
| "loss": 1.2638, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.6444937176644494, |
| "grad_norm": 0.6349696744735929, |
| "learning_rate": 6.755746689271112e-06, |
| "loss": 1.2064, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.6448879034244888, |
| "grad_norm": 0.6202351218573725, |
| "learning_rate": 6.742727559059448e-06, |
| "loss": 1.2017, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.6452820891845282, |
| "grad_norm": 0.6114039580216786, |
| "learning_rate": 6.729714602220256e-06, |
| "loss": 1.1862, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.6456762749445676, |
| "grad_norm": 0.6747317843915315, |
| "learning_rate": 6.71670784341646e-06, |
| "loss": 1.2687, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.646070460704607, |
| "grad_norm": 0.6221379676750881, |
| "learning_rate": 6.703707307299224e-06, |
| "loss": 1.1739, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.6464646464646465, |
| "grad_norm": 0.6067484985660325, |
| "learning_rate": 6.690713018507917e-06, |
| "loss": 1.1716, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.6468588322246859, |
| "grad_norm": 0.6646806120765326, |
| "learning_rate": 6.677725001670078e-06, |
| "loss": 1.2563, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.6472530179847253, |
| "grad_norm": 0.6381676236429237, |
| "learning_rate": 6.664743281401351e-06, |
| "loss": 1.2079, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.6476472037447647, |
| "grad_norm": 0.6325821061959688, |
| "learning_rate": 6.651767882305447e-06, |
| "loss": 1.1695, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.6480413895048042, |
| "grad_norm": 0.6475669717517898, |
| "learning_rate": 6.6387988289741e-06, |
| "loss": 1.2316, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.6484355752648435, |
| "grad_norm": 0.6328642670845832, |
| "learning_rate": 6.625836145987015e-06, |
| "loss": 1.187, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.648829761024883, |
| "grad_norm": 0.6356937277383269, |
| "learning_rate": 6.612879857911825e-06, |
| "loss": 1.1713, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.6492239467849223, |
| "grad_norm": 0.6286143776886958, |
| "learning_rate": 6.599929989304034e-06, |
| "loss": 1.1949, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.6496181325449618, |
| "grad_norm": 0.6251531191060387, |
| "learning_rate": 6.5869865647069995e-06, |
| "loss": 1.1918, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.6500123183050013, |
| "grad_norm": 0.6111849191258597, |
| "learning_rate": 6.574049608651849e-06, |
| "loss": 1.1922, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.6504065040650406, |
| "grad_norm": 0.6172328892977227, |
| "learning_rate": 6.561119145657451e-06, |
| "loss": 1.2013, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.6508006898250801, |
| "grad_norm": 0.6563068727145971, |
| "learning_rate": 6.548195200230376e-06, |
| "loss": 1.1936, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.6511948755851195, |
| "grad_norm": 0.6451511184566149, |
| "learning_rate": 6.535277796864842e-06, |
| "loss": 1.1765, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.6515890613451589, |
| "grad_norm": 0.6148495858039739, |
| "learning_rate": 6.522366960042654e-06, |
| "loss": 1.1506, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.6519832471051983, |
| "grad_norm": 0.6125300863917666, |
| "learning_rate": 6.509462714233194e-06, |
| "loss": 1.1669, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6523774328652377, |
| "grad_norm": 0.630309988193399, |
| "learning_rate": 6.496565083893333e-06, |
| "loss": 1.1889, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6527716186252772, |
| "grad_norm": 0.6634157824387188, |
| "learning_rate": 6.483674093467409e-06, |
| "loss": 1.2278, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.6531658043853166, |
| "grad_norm": 0.631045534805432, |
| "learning_rate": 6.470789767387188e-06, |
| "loss": 1.1569, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.653559990145356, |
| "grad_norm": 0.6445024253655253, |
| "learning_rate": 6.457912130071786e-06, |
| "loss": 1.2291, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6539541759053954, |
| "grad_norm": 0.6295685120939664, |
| "learning_rate": 6.445041205927658e-06, |
| "loss": 1.1953, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6543483616654349, |
| "grad_norm": 0.6095510411838025, |
| "learning_rate": 6.432177019348521e-06, |
| "loss": 1.2001, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6547425474254742, |
| "grad_norm": 0.6444146297988372, |
| "learning_rate": 6.419319594715338e-06, |
| "loss": 1.244, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.6551367331855137, |
| "grad_norm": 0.6104207832263667, |
| "learning_rate": 6.4064689563962505e-06, |
| "loss": 1.1556, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.655530918945553, |
| "grad_norm": 0.6326952360287978, |
| "learning_rate": 6.393625128746527e-06, |
| "loss": 1.1521, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6559251047055925, |
| "grad_norm": 0.640334858610275, |
| "learning_rate": 6.3807881361085465e-06, |
| "loss": 1.181, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.656319290465632, |
| "grad_norm": 0.6504217808929613, |
| "learning_rate": 6.367958002811726e-06, |
| "loss": 1.1974, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6567134762256713, |
| "grad_norm": 0.6529534715347126, |
| "learning_rate": 6.355134753172474e-06, |
| "loss": 1.1889, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.6571076619857108, |
| "grad_norm": 0.6654769765183821, |
| "learning_rate": 6.3423184114941686e-06, |
| "loss": 1.1865, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.6575018477457502, |
| "grad_norm": 0.6436155169730803, |
| "learning_rate": 6.32950900206708e-06, |
| "loss": 1.1647, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6578960335057896, |
| "grad_norm": 0.6503660356165931, |
| "learning_rate": 6.31670654916835e-06, |
| "loss": 1.1674, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.658290219265829, |
| "grad_norm": 0.6608765081904892, |
| "learning_rate": 6.303911077061937e-06, |
| "loss": 1.2069, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6586844050258684, |
| "grad_norm": 0.6417814536413016, |
| "learning_rate": 6.291122609998559e-06, |
| "loss": 1.2464, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6590785907859079, |
| "grad_norm": 0.6676289218023853, |
| "learning_rate": 6.278341172215669e-06, |
| "loss": 1.2228, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6594727765459473, |
| "grad_norm": 0.6280886790009287, |
| "learning_rate": 6.265566787937386e-06, |
| "loss": 1.1968, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6598669623059867, |
| "grad_norm": 0.6483564238116941, |
| "learning_rate": 6.252799481374472e-06, |
| "loss": 1.2109, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.6602611480660261, |
| "grad_norm": 0.6189215649081374, |
| "learning_rate": 6.240039276724273e-06, |
| "loss": 1.196, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6606553338260656, |
| "grad_norm": 0.6496483405660746, |
| "learning_rate": 6.227286198170663e-06, |
| "loss": 1.2246, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6610495195861049, |
| "grad_norm": 0.6436584140179482, |
| "learning_rate": 6.214540269884026e-06, |
| "loss": 1.2284, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6614437053461444, |
| "grad_norm": 0.6076777270904066, |
| "learning_rate": 6.20180151602119e-06, |
| "loss": 1.1942, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6618378911061837, |
| "grad_norm": 0.636033416189757, |
| "learning_rate": 6.189069960725375e-06, |
| "loss": 1.1675, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6622320768662232, |
| "grad_norm": 0.6396164730580286, |
| "learning_rate": 6.176345628126176e-06, |
| "loss": 1.1487, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6626262626262627, |
| "grad_norm": 0.6015028228353986, |
| "learning_rate": 6.163628542339482e-06, |
| "loss": 1.1619, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.663020448386302, |
| "grad_norm": 0.6749292049019211, |
| "learning_rate": 6.150918727467455e-06, |
| "loss": 1.254, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6634146341463415, |
| "grad_norm": 0.6328636162023467, |
| "learning_rate": 6.138216207598484e-06, |
| "loss": 1.2299, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6638088199063809, |
| "grad_norm": 0.6214587756005278, |
| "learning_rate": 6.125521006807116e-06, |
| "loss": 1.2219, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6642030056664203, |
| "grad_norm": 0.6537286104808447, |
| "learning_rate": 6.112833149154042e-06, |
| "loss": 1.2113, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6645971914264597, |
| "grad_norm": 0.609872538457475, |
| "learning_rate": 6.10015265868602e-06, |
| "loss": 1.1715, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6649913771864991, |
| "grad_norm": 0.6494731629680189, |
| "learning_rate": 6.0874795594358635e-06, |
| "loss": 1.2314, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.6653855629465386, |
| "grad_norm": 0.632923311793017, |
| "learning_rate": 6.0748138754223665e-06, |
| "loss": 1.1768, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.665779748706578, |
| "grad_norm": 0.6247202140755514, |
| "learning_rate": 6.062155630650265e-06, |
| "loss": 1.1812, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6661739344666174, |
| "grad_norm": 0.631382377815529, |
| "learning_rate": 6.04950484911021e-06, |
| "loss": 1.1885, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.6665681202266568, |
| "grad_norm": 0.6138459038575285, |
| "learning_rate": 6.036861554778695e-06, |
| "loss": 1.1024, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6669623059866963, |
| "grad_norm": 0.6265529929087996, |
| "learning_rate": 6.024225771618024e-06, |
| "loss": 1.1803, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6673564917467356, |
| "grad_norm": 0.6227616940366973, |
| "learning_rate": 6.01159752357628e-06, |
| "loss": 1.2006, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6677506775067751, |
| "grad_norm": 0.6558790947502295, |
| "learning_rate": 5.998976834587246e-06, |
| "loss": 1.2862, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6681448632668144, |
| "grad_norm": 0.6304744900349945, |
| "learning_rate": 5.98636372857039e-06, |
| "loss": 1.1633, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6685390490268539, |
| "grad_norm": 0.6318297859034908, |
| "learning_rate": 5.973758229430806e-06, |
| "loss": 1.2295, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6689332347868934, |
| "grad_norm": 0.5988437549278761, |
| "learning_rate": 5.961160361059168e-06, |
| "loss": 1.1157, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6693274205469327, |
| "grad_norm": 0.6137920151619946, |
| "learning_rate": 5.9485701473316925e-06, |
| "loss": 1.1448, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6697216063069722, |
| "grad_norm": 0.6329970134758367, |
| "learning_rate": 5.935987612110081e-06, |
| "loss": 1.1792, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6701157920670116, |
| "grad_norm": 0.6102586025760833, |
| "learning_rate": 5.923412779241493e-06, |
| "loss": 1.1214, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.670509977827051, |
| "grad_norm": 0.6016261422928656, |
| "learning_rate": 5.910845672558483e-06, |
| "loss": 1.1718, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6709041635870904, |
| "grad_norm": 0.6144263728280865, |
| "learning_rate": 5.8982863158789605e-06, |
| "loss": 1.1613, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6712983493471298, |
| "grad_norm": 0.621741539871381, |
| "learning_rate": 5.8857347330061545e-06, |
| "loss": 1.2034, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6716925351071693, |
| "grad_norm": 0.6395204468391608, |
| "learning_rate": 5.873190947728552e-06, |
| "loss": 1.2198, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6720867208672087, |
| "grad_norm": 0.606550147222352, |
| "learning_rate": 5.860654983819865e-06, |
| "loss": 1.1776, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6724809066272481, |
| "grad_norm": 0.61755989526117, |
| "learning_rate": 5.84812686503899e-06, |
| "loss": 1.2269, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6728750923872875, |
| "grad_norm": 0.7087998957119107, |
| "learning_rate": 5.83560661512994e-06, |
| "loss": 1.2204, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.673269278147327, |
| "grad_norm": 0.6413367764373633, |
| "learning_rate": 5.823094257821822e-06, |
| "loss": 1.1834, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6736634639073663, |
| "grad_norm": 0.6157486461013707, |
| "learning_rate": 5.810589816828786e-06, |
| "loss": 1.1602, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.6740576496674058, |
| "grad_norm": 0.6342496529809019, |
| "learning_rate": 5.798093315849984e-06, |
| "loss": 1.2135, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6744518354274451, |
| "grad_norm": 0.6117339478605194, |
| "learning_rate": 5.785604778569505e-06, |
| "loss": 1.177, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6748460211874846, |
| "grad_norm": 0.6360723349056584, |
| "learning_rate": 5.773124228656348e-06, |
| "loss": 1.2873, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6752402069475241, |
| "grad_norm": 0.6302819005649393, |
| "learning_rate": 5.76065168976439e-06, |
| "loss": 1.1972, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.6756343927075634, |
| "grad_norm": 0.6224162266525995, |
| "learning_rate": 5.748187185532306e-06, |
| "loss": 1.1855, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.6760285784676029, |
| "grad_norm": 0.6281722704464516, |
| "learning_rate": 5.73573073958355e-06, |
| "loss": 1.1815, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6764227642276422, |
| "grad_norm": 0.6081887852352087, |
| "learning_rate": 5.723282375526302e-06, |
| "loss": 1.1804, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.6768169499876817, |
| "grad_norm": 0.6352236721472015, |
| "learning_rate": 5.7108421169534376e-06, |
| "loss": 1.1534, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6772111357477211, |
| "grad_norm": 0.5979382590678716, |
| "learning_rate": 5.698409987442448e-06, |
| "loss": 1.1452, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6776053215077605, |
| "grad_norm": 0.6036448112025448, |
| "learning_rate": 5.685986010555437e-06, |
| "loss": 1.1876, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6779995072678, |
| "grad_norm": 0.6219506058018258, |
| "learning_rate": 5.6735702098390454e-06, |
| "loss": 1.2324, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6783936930278394, |
| "grad_norm": 0.6263654931652052, |
| "learning_rate": 5.66116260882442e-06, |
| "loss": 1.1572, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6787878787878788, |
| "grad_norm": 0.6278411193914041, |
| "learning_rate": 5.648763231027171e-06, |
| "loss": 1.1307, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6791820645479182, |
| "grad_norm": 0.6294069087185388, |
| "learning_rate": 5.636372099947327e-06, |
| "loss": 1.2278, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6795762503079577, |
| "grad_norm": 0.6296558801771532, |
| "learning_rate": 5.623989239069275e-06, |
| "loss": 1.1627, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.679970436067997, |
| "grad_norm": 0.6385637803835064, |
| "learning_rate": 5.611614671861733e-06, |
| "loss": 1.1481, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6803646218280365, |
| "grad_norm": 0.6307923826155407, |
| "learning_rate": 5.5992484217777074e-06, |
| "loss": 1.2114, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6807588075880758, |
| "grad_norm": 0.6040246463542289, |
| "learning_rate": 5.5868905122544344e-06, |
| "loss": 1.2137, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6811529933481153, |
| "grad_norm": 0.6139446753066389, |
| "learning_rate": 5.574540966713338e-06, |
| "loss": 1.1472, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.6815471791081548, |
| "grad_norm": 0.6430020863098516, |
| "learning_rate": 5.562199808560001e-06, |
| "loss": 1.2109, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.6819413648681941, |
| "grad_norm": 0.6061201727927807, |
| "learning_rate": 5.549867061184108e-06, |
| "loss": 1.1718, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6823355506282336, |
| "grad_norm": 0.6422178072097416, |
| "learning_rate": 5.5375427479593945e-06, |
| "loss": 1.1794, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.682729736388273, |
| "grad_norm": 0.6458731861630423, |
| "learning_rate": 5.525226892243623e-06, |
| "loss": 1.2502, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.6831239221483124, |
| "grad_norm": 0.631975611730984, |
| "learning_rate": 5.5129195173785184e-06, |
| "loss": 1.224, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6835181079083518, |
| "grad_norm": 0.639062643993908, |
| "learning_rate": 5.50062064668973e-06, |
| "loss": 1.2374, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6839122936683912, |
| "grad_norm": 0.6153286588995233, |
| "learning_rate": 5.488330303486795e-06, |
| "loss": 1.1532, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6843064794284307, |
| "grad_norm": 0.6095750520956184, |
| "learning_rate": 5.4760485110630956e-06, |
| "loss": 1.1539, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6847006651884701, |
| "grad_norm": 0.6242095926386367, |
| "learning_rate": 5.46377529269579e-06, |
| "loss": 1.1842, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6850948509485095, |
| "grad_norm": 0.6373500217851757, |
| "learning_rate": 5.451510671645806e-06, |
| "loss": 1.2564, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6854890367085489, |
| "grad_norm": 0.6528326441972604, |
| "learning_rate": 5.439254671157764e-06, |
| "loss": 1.2031, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6858832224685883, |
| "grad_norm": 0.6265646534423697, |
| "learning_rate": 5.427007314459949e-06, |
| "loss": 1.2276, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6862774082286277, |
| "grad_norm": 0.6155975267249686, |
| "learning_rate": 5.414768624764262e-06, |
| "loss": 1.168, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6866715939886672, |
| "grad_norm": 0.6407827075088298, |
| "learning_rate": 5.402538625266184e-06, |
| "loss": 1.2118, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6870657797487065, |
| "grad_norm": 0.6203929435962302, |
| "learning_rate": 5.390317339144726e-06, |
| "loss": 1.1711, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.687459965508746, |
| "grad_norm": 0.6296758413992221, |
| "learning_rate": 5.378104789562373e-06, |
| "loss": 1.1671, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6878541512687855, |
| "grad_norm": 0.6402560327012314, |
| "learning_rate": 5.3659009996650704e-06, |
| "loss": 1.2331, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6882483370288248, |
| "grad_norm": 0.6352813958888808, |
| "learning_rate": 5.353705992582147e-06, |
| "loss": 1.171, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6886425227888643, |
| "grad_norm": 0.6173013307650468, |
| "learning_rate": 5.341519791426285e-06, |
| "loss": 1.1872, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6890367085489036, |
| "grad_norm": 0.6300579221159313, |
| "learning_rate": 5.329342419293488e-06, |
| "loss": 1.1538, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.6894308943089431, |
| "grad_norm": 0.6452484286067051, |
| "learning_rate": 5.3171738992630266e-06, |
| "loss": 1.1983, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6898250800689825, |
| "grad_norm": 0.6351697766210709, |
| "learning_rate": 5.305014254397378e-06, |
| "loss": 1.2099, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6902192658290219, |
| "grad_norm": 0.6059437488402356, |
| "learning_rate": 5.292863507742218e-06, |
| "loss": 1.1429, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6906134515890614, |
| "grad_norm": 0.6375500404238919, |
| "learning_rate": 5.280721682326349e-06, |
| "loss": 1.195, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.6910076373491008, |
| "grad_norm": 0.6214302914583397, |
| "learning_rate": 5.268588801161661e-06, |
| "loss": 1.1562, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6914018231091402, |
| "grad_norm": 0.6233573649742591, |
| "learning_rate": 5.256464887243095e-06, |
| "loss": 1.1784, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6917960088691796, |
| "grad_norm": 0.6057486309866048, |
| "learning_rate": 5.244349963548603e-06, |
| "loss": 1.1841, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.692190194629219, |
| "grad_norm": 0.6262495769486762, |
| "learning_rate": 5.232244053039099e-06, |
| "loss": 1.2069, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6925843803892584, |
| "grad_norm": 0.6244256499974958, |
| "learning_rate": 5.220147178658401e-06, |
| "loss": 1.2099, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6929785661492979, |
| "grad_norm": 0.5987132658245882, |
| "learning_rate": 5.208059363333218e-06, |
| "loss": 1.1172, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.6933727519093372, |
| "grad_norm": 0.6204462023553633, |
| "learning_rate": 5.195980629973077e-06, |
| "loss": 1.1287, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6937669376693767, |
| "grad_norm": 0.616887618107624, |
| "learning_rate": 5.183911001470296e-06, |
| "loss": 1.1707, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6941611234294162, |
| "grad_norm": 0.6131588350689924, |
| "learning_rate": 5.171850500699942e-06, |
| "loss": 1.1913, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6945553091894555, |
| "grad_norm": 0.6220240105240659, |
| "learning_rate": 5.159799150519773e-06, |
| "loss": 1.1752, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.694949494949495, |
| "grad_norm": 0.6474411617934912, |
| "learning_rate": 5.147756973770215e-06, |
| "loss": 1.1685, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6953436807095343, |
| "grad_norm": 0.6074241395347293, |
| "learning_rate": 5.135723993274304e-06, |
| "loss": 1.1274, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6957378664695738, |
| "grad_norm": 0.6257258438943853, |
| "learning_rate": 5.123700231837643e-06, |
| "loss": 1.1876, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6961320522296132, |
| "grad_norm": 0.6240327119384406, |
| "learning_rate": 5.111685712248364e-06, |
| "loss": 1.1356, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6965262379896526, |
| "grad_norm": 0.6058794807211466, |
| "learning_rate": 5.099680457277083e-06, |
| "loss": 1.1859, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6969204237496921, |
| "grad_norm": 0.6130830438069458, |
| "learning_rate": 5.087684489676862e-06, |
| "loss": 1.1917, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.6973146095097315, |
| "grad_norm": 0.6307417343281665, |
| "learning_rate": 5.07569783218316e-06, |
| "loss": 1.2297, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6977087952697709, |
| "grad_norm": 0.6127737313603762, |
| "learning_rate": 5.063720507513781e-06, |
| "loss": 1.1673, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6981029810298103, |
| "grad_norm": 0.624666994089622, |
| "learning_rate": 5.051752538368855e-06, |
| "loss": 1.2133, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.6984971667898497, |
| "grad_norm": 0.612192851855714, |
| "learning_rate": 5.039793947430774e-06, |
| "loss": 1.1894, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6988913525498891, |
| "grad_norm": 0.6163484499307348, |
| "learning_rate": 5.02784475736415e-06, |
| "loss": 1.1901, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6992855383099286, |
| "grad_norm": 0.6189253804729046, |
| "learning_rate": 5.015904990815792e-06, |
| "loss": 1.1852, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6996797240699679, |
| "grad_norm": 0.6315133839229915, |
| "learning_rate": 5.003974670414633e-06, |
| "loss": 1.2218, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.7000739098300074, |
| "grad_norm": 0.6143569728327692, |
| "learning_rate": 4.992053818771715e-06, |
| "loss": 1.1698, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.7004680955900469, |
| "grad_norm": 0.6023568254933535, |
| "learning_rate": 4.980142458480136e-06, |
| "loss": 1.1618, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.7008622813500862, |
| "grad_norm": 0.620427287297367, |
| "learning_rate": 4.968240612114995e-06, |
| "loss": 1.1812, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.7012564671101257, |
| "grad_norm": 0.6169377500547716, |
| "learning_rate": 4.956348302233364e-06, |
| "loss": 1.1729, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.701650652870165, |
| "grad_norm": 0.6119581164148135, |
| "learning_rate": 4.944465551374238e-06, |
| "loss": 1.1942, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7020448386302045, |
| "grad_norm": 0.6207029111041957, |
| "learning_rate": 4.932592382058503e-06, |
| "loss": 1.1841, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.7024390243902439, |
| "grad_norm": 0.6274557767427725, |
| "learning_rate": 4.920728816788885e-06, |
| "loss": 1.2241, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.7028332101502833, |
| "grad_norm": 0.6251490097972446, |
| "learning_rate": 4.908874878049894e-06, |
| "loss": 1.1746, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.7032273959103228, |
| "grad_norm": 0.6421558996903795, |
| "learning_rate": 4.897030588307816e-06, |
| "loss": 1.1599, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.7036215816703622, |
| "grad_norm": 0.6580529776636076, |
| "learning_rate": 4.885195970010634e-06, |
| "loss": 1.1876, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.7040157674304016, |
| "grad_norm": 0.7799716182595261, |
| "learning_rate": 4.873371045588002e-06, |
| "loss": 1.1619, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.704409953190441, |
| "grad_norm": 0.6034015555793384, |
| "learning_rate": 4.861555837451213e-06, |
| "loss": 1.1339, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.7048041389504804, |
| "grad_norm": 0.6354298706812905, |
| "learning_rate": 4.84975036799313e-06, |
| "loss": 1.1904, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.7051983247105198, |
| "grad_norm": 0.656808882761667, |
| "learning_rate": 4.837954659588172e-06, |
| "loss": 1.2118, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.7055925104705593, |
| "grad_norm": 0.6354068123945864, |
| "learning_rate": 4.826168734592254e-06, |
| "loss": 1.2657, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.7059866962305986, |
| "grad_norm": 0.6135559463093657, |
| "learning_rate": 4.814392615342746e-06, |
| "loss": 1.218, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.7063808819906381, |
| "grad_norm": 0.6190332303953764, |
| "learning_rate": 4.802626324158432e-06, |
| "loss": 1.1298, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.7067750677506776, |
| "grad_norm": 0.6261895312898496, |
| "learning_rate": 4.790869883339473e-06, |
| "loss": 1.2229, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.7071692535107169, |
| "grad_norm": 0.6499346687616555, |
| "learning_rate": 4.779123315167362e-06, |
| "loss": 1.2436, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.7075634392707564, |
| "grad_norm": 0.7112549120650247, |
| "learning_rate": 4.767386641904883e-06, |
| "loss": 1.1948, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.7079576250307957, |
| "grad_norm": 0.6187195781334022, |
| "learning_rate": 4.755659885796054e-06, |
| "loss": 1.2253, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.7083518107908352, |
| "grad_norm": 0.616576163504054, |
| "learning_rate": 4.743943069066118e-06, |
| "loss": 1.1448, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.7087459965508746, |
| "grad_norm": 0.614300702515973, |
| "learning_rate": 4.73223621392146e-06, |
| "loss": 1.181, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.709140182310914, |
| "grad_norm": 0.6141034301455051, |
| "learning_rate": 4.720539342549594e-06, |
| "loss": 1.1788, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.7095343680709535, |
| "grad_norm": 0.6073756603898747, |
| "learning_rate": 4.708852477119117e-06, |
| "loss": 1.1848, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.7099285538309928, |
| "grad_norm": 0.6344185849187683, |
| "learning_rate": 4.6971756397796506e-06, |
| "loss": 1.1721, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.7103227395910323, |
| "grad_norm": 0.6248360198993864, |
| "learning_rate": 4.6855088526618205e-06, |
| "loss": 1.1565, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.7107169253510717, |
| "grad_norm": 0.6152420860002373, |
| "learning_rate": 4.6738521378772066e-06, |
| "loss": 1.1702, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.6168160579182377, |
| "learning_rate": 4.662205517518286e-06, |
| "loss": 1.1988, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.7115052968711505, |
| "grad_norm": 0.6199790217466414, |
| "learning_rate": 4.650569013658417e-06, |
| "loss": 1.2058, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.71189948263119, |
| "grad_norm": 0.6176228890841313, |
| "learning_rate": 4.638942648351774e-06, |
| "loss": 1.1612, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.7122936683912293, |
| "grad_norm": 0.5959975381441662, |
| "learning_rate": 4.627326443633327e-06, |
| "loss": 1.1628, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.7126878541512688, |
| "grad_norm": 0.6189398958365385, |
| "learning_rate": 4.61572042151878e-06, |
| "loss": 1.1928, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.7130820399113083, |
| "grad_norm": 0.6271163010563219, |
| "learning_rate": 4.604124604004544e-06, |
| "loss": 1.2124, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.7134762256713476, |
| "grad_norm": 0.6000046568229123, |
| "learning_rate": 4.592539013067692e-06, |
| "loss": 1.153, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.7138704114313871, |
| "grad_norm": 0.5989067172216591, |
| "learning_rate": 4.580963670665906e-06, |
| "loss": 1.1537, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.7142645971914264, |
| "grad_norm": 0.65003150237445, |
| "learning_rate": 4.569398598737448e-06, |
| "loss": 1.2302, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.7146587829514659, |
| "grad_norm": 0.6224236372159876, |
| "learning_rate": 4.557843819201121e-06, |
| "loss": 1.2191, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.7150529687115053, |
| "grad_norm": 0.6360681967059407, |
| "learning_rate": 4.546299353956211e-06, |
| "loss": 1.1782, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.7154471544715447, |
| "grad_norm": 0.6134230197484926, |
| "learning_rate": 4.534765224882463e-06, |
| "loss": 1.2106, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.7158413402315842, |
| "grad_norm": 0.6176737002203802, |
| "learning_rate": 4.5232414538400336e-06, |
| "loss": 1.2175, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.7162355259916235, |
| "grad_norm": 0.6202906864487361, |
| "learning_rate": 4.511728062669443e-06, |
| "loss": 1.1807, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.716629711751663, |
| "grad_norm": 0.6212585444516489, |
| "learning_rate": 4.50022507319154e-06, |
| "loss": 1.1958, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.7170238975117024, |
| "grad_norm": 0.6142126146314887, |
| "learning_rate": 4.488732507207457e-06, |
| "loss": 1.189, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.7174180832717418, |
| "grad_norm": 0.6301160963451029, |
| "learning_rate": 4.477250386498582e-06, |
| "loss": 1.2383, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.7178122690317812, |
| "grad_norm": 0.6238993246895916, |
| "learning_rate": 4.46577873282649e-06, |
| "loss": 1.1642, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.7182064547918207, |
| "grad_norm": 0.5954902888936976, |
| "learning_rate": 4.4543175679329345e-06, |
| "loss": 1.1319, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.71860064055186, |
| "grad_norm": 0.5975113333384684, |
| "learning_rate": 4.442866913539783e-06, |
| "loss": 1.1692, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.7189948263118995, |
| "grad_norm": 0.6361387072646193, |
| "learning_rate": 4.431426791348981e-06, |
| "loss": 1.2058, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.719389012071939, |
| "grad_norm": 0.6206879841575946, |
| "learning_rate": 4.419997223042509e-06, |
| "loss": 1.1892, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.7197831978319783, |
| "grad_norm": 0.6187188924722868, |
| "learning_rate": 4.408578230282361e-06, |
| "loss": 1.2343, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.7201773835920178, |
| "grad_norm": 0.6099133549608606, |
| "learning_rate": 4.397169834710467e-06, |
| "loss": 1.1874, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.7205715693520571, |
| "grad_norm": 0.6218762750404337, |
| "learning_rate": 4.38577205794869e-06, |
| "loss": 1.2522, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.7209657551120966, |
| "grad_norm": 0.6122795104171647, |
| "learning_rate": 4.37438492159876e-06, |
| "loss": 1.1989, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.721359940872136, |
| "grad_norm": 0.6015290594639533, |
| "learning_rate": 4.36300844724224e-06, |
| "loss": 1.1714, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.7217541266321754, |
| "grad_norm": 0.6252355128162509, |
| "learning_rate": 4.351642656440482e-06, |
| "loss": 1.1703, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.7221483123922149, |
| "grad_norm": 0.6111637339804932, |
| "learning_rate": 4.340287570734604e-06, |
| "loss": 1.152, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.7225424981522542, |
| "grad_norm": 0.6101267108124663, |
| "learning_rate": 4.32894321164542e-06, |
| "loss": 1.184, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.7229366839122937, |
| "grad_norm": 0.6424270287758459, |
| "learning_rate": 4.317609600673418e-06, |
| "loss": 1.1703, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.7233308696723331, |
| "grad_norm": 0.6224326912866733, |
| "learning_rate": 4.306286759298721e-06, |
| "loss": 1.1925, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.7237250554323725, |
| "grad_norm": 0.5990540447824775, |
| "learning_rate": 4.294974708981041e-06, |
| "loss": 1.1549, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.7241192411924119, |
| "grad_norm": 0.6304187409365657, |
| "learning_rate": 4.283673471159632e-06, |
| "loss": 1.1974, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.7245134269524514, |
| "grad_norm": 0.6236344446716869, |
| "learning_rate": 4.272383067253254e-06, |
| "loss": 1.1704, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.7249076127124907, |
| "grad_norm": 0.6183536446735383, |
| "learning_rate": 4.2611035186601445e-06, |
| "loss": 1.2539, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.7253017984725302, |
| "grad_norm": 0.6381015795817223, |
| "learning_rate": 4.2498348467579555e-06, |
| "loss": 1.1772, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.7256959842325696, |
| "grad_norm": 0.6196633330398633, |
| "learning_rate": 4.2385770729037336e-06, |
| "loss": 1.1597, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.726090169992609, |
| "grad_norm": 0.6402144565991683, |
| "learning_rate": 4.22733021843387e-06, |
| "loss": 1.2207, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.7264843557526485, |
| "grad_norm": 0.6134635440909342, |
| "learning_rate": 4.216094304664056e-06, |
| "loss": 1.2303, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.7268785415126878, |
| "grad_norm": 0.6170474770272091, |
| "learning_rate": 4.204869352889246e-06, |
| "loss": 1.1897, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.625150589347141, |
| "learning_rate": 4.193655384383631e-06, |
| "loss": 1.1273, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.7276669130327666, |
| "grad_norm": 0.6702486495437785, |
| "learning_rate": 4.182452420400571e-06, |
| "loss": 1.2604, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.7280610987928061, |
| "grad_norm": 1.1398019367962655, |
| "learning_rate": 4.171260482172574e-06, |
| "loss": 1.151, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.7284552845528456, |
| "grad_norm": 0.6232712417738132, |
| "learning_rate": 4.160079590911257e-06, |
| "loss": 1.1928, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.7288494703128849, |
| "grad_norm": 0.6346597753210788, |
| "learning_rate": 4.1489097678073e-06, |
| "loss": 1.2134, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.7292436560729244, |
| "grad_norm": 0.622479343337929, |
| "learning_rate": 4.1377510340304e-06, |
| "loss": 1.1351, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.7296378418329638, |
| "grad_norm": 0.6095396783729989, |
| "learning_rate": 4.126603410729232e-06, |
| "loss": 1.1835, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.7300320275930032, |
| "grad_norm": 0.6007947259934253, |
| "learning_rate": 4.1154669190314315e-06, |
| "loss": 1.1361, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.7304262133530426, |
| "grad_norm": 0.6392450529455237, |
| "learning_rate": 4.104341580043518e-06, |
| "loss": 1.2352, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.730820399113082, |
| "grad_norm": 0.6088170301748977, |
| "learning_rate": 4.093227414850887e-06, |
| "loss": 1.1555, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.7312145848731214, |
| "grad_norm": 0.611940955223257, |
| "learning_rate": 4.0821244445177535e-06, |
| "loss": 1.1035, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.7316087706331609, |
| "grad_norm": 0.6429334370137534, |
| "learning_rate": 4.071032690087111e-06, |
| "loss": 1.2077, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.7320029563932003, |
| "grad_norm": 0.6199867856316763, |
| "learning_rate": 4.059952172580694e-06, |
| "loss": 1.1898, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.7323971421532397, |
| "grad_norm": 0.682925719480743, |
| "learning_rate": 4.0488829129989536e-06, |
| "loss": 1.1796, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.7327913279132792, |
| "grad_norm": 0.6300326280908697, |
| "learning_rate": 4.0378249323209915e-06, |
| "loss": 1.1821, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.7331855136733185, |
| "grad_norm": 0.6188854368428854, |
| "learning_rate": 4.026778251504533e-06, |
| "loss": 1.212, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.733579699433358, |
| "grad_norm": 0.7209116321064022, |
| "learning_rate": 4.015742891485893e-06, |
| "loss": 1.2115, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.7339738851933973, |
| "grad_norm": 0.6377551509793858, |
| "learning_rate": 4.0047188731799345e-06, |
| "loss": 1.2223, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.7343680709534368, |
| "grad_norm": 0.6709121309342012, |
| "learning_rate": 3.993706217480015e-06, |
| "loss": 1.2369, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.7347622567134763, |
| "grad_norm": 0.6610392131221031, |
| "learning_rate": 3.982704945257957e-06, |
| "loss": 1.238, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.7351564424735156, |
| "grad_norm": 0.6314301850508148, |
| "learning_rate": 3.97171507736402e-06, |
| "loss": 1.1694, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.7355506282335551, |
| "grad_norm": 0.6075680590520474, |
| "learning_rate": 3.960736634626838e-06, |
| "loss": 1.1627, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.7359448139935945, |
| "grad_norm": 0.6341926480920811, |
| "learning_rate": 3.949769637853393e-06, |
| "loss": 1.1434, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.7363389997536339, |
| "grad_norm": 0.621486685123361, |
| "learning_rate": 3.9388141078289775e-06, |
| "loss": 1.1946, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.7367331855136733, |
| "grad_norm": 0.6464204738071503, |
| "learning_rate": 3.927870065317156e-06, |
| "loss": 1.1774, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.7371273712737128, |
| "grad_norm": 0.6718388040792097, |
| "learning_rate": 3.916937531059706e-06, |
| "loss": 1.161, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.7375215570337521, |
| "grad_norm": 0.6323822736177052, |
| "learning_rate": 3.9060165257766116e-06, |
| "loss": 1.2166, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.7379157427937916, |
| "grad_norm": 0.6289704307488232, |
| "learning_rate": 3.895107070165995e-06, |
| "loss": 1.1657, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.738309928553831, |
| "grad_norm": 0.6262746372052379, |
| "learning_rate": 3.884209184904088e-06, |
| "loss": 1.2249, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.7387041143138704, |
| "grad_norm": 0.6184529013832247, |
| "learning_rate": 3.873322890645202e-06, |
| "loss": 1.1515, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.7390983000739099, |
| "grad_norm": 0.6290711060233826, |
| "learning_rate": 3.862448208021677e-06, |
| "loss": 1.1834, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.7394924858339492, |
| "grad_norm": 0.5895476413662796, |
| "learning_rate": 3.851585157643845e-06, |
| "loss": 1.1234, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.7398866715939887, |
| "grad_norm": 0.6107335830258855, |
| "learning_rate": 3.840733760099985e-06, |
| "loss": 1.1639, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.740280857354028, |
| "grad_norm": 0.6322945602429125, |
| "learning_rate": 3.829894035956306e-06, |
| "loss": 1.2427, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.7406750431140675, |
| "grad_norm": 0.6323335943798655, |
| "learning_rate": 3.819066005756883e-06, |
| "loss": 1.2223, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.741069228874107, |
| "grad_norm": 0.6078450616507315, |
| "learning_rate": 3.8082496900236244e-06, |
| "loss": 1.1706, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.7414634146341463, |
| "grad_norm": 0.6221466682968542, |
| "learning_rate": 3.7974451092562447e-06, |
| "loss": 1.2046, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.7418576003941858, |
| "grad_norm": 0.6049678464198069, |
| "learning_rate": 3.7866522839322207e-06, |
| "loss": 1.1767, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.7422517861542252, |
| "grad_norm": 0.6295952461868448, |
| "learning_rate": 3.775871234506734e-06, |
| "loss": 1.2225, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.7426459719142646, |
| "grad_norm": 0.6394412262692781, |
| "learning_rate": 3.7651019814126656e-06, |
| "loss": 1.214, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.743040157674304, |
| "grad_norm": 0.610513027873533, |
| "learning_rate": 3.754344545060529e-06, |
| "loss": 1.1537, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.7434343434343434, |
| "grad_norm": 0.5956769595890598, |
| "learning_rate": 3.743598945838438e-06, |
| "loss": 1.1758, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.7438285291943828, |
| "grad_norm": 0.6417078515489372, |
| "learning_rate": 3.732865204112084e-06, |
| "loss": 1.1991, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.7442227149544223, |
| "grad_norm": 0.6291270205503651, |
| "learning_rate": 3.722143340224682e-06, |
| "loss": 1.2203, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.7446169007144617, |
| "grad_norm": 0.6143214199994612, |
| "learning_rate": 3.7114333744969312e-06, |
| "loss": 1.2053, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.7450110864745011, |
| "grad_norm": 0.6247493772614575, |
| "learning_rate": 3.7007353272269764e-06, |
| "loss": 1.187, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.7454052722345406, |
| "grad_norm": 0.6280559082279741, |
| "learning_rate": 3.6900492186903893e-06, |
| "loss": 1.2001, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.7457994579945799, |
| "grad_norm": 0.6656868801405882, |
| "learning_rate": 3.6793750691400996e-06, |
| "loss": 1.2266, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.7461936437546194, |
| "grad_norm": 0.6290134544837587, |
| "learning_rate": 3.6687128988063768e-06, |
| "loss": 1.2643, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.7465878295146587, |
| "grad_norm": 0.6046720210188277, |
| "learning_rate": 3.6580627278967883e-06, |
| "loss": 1.1329, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.7469820152746982, |
| "grad_norm": 0.6132109677638092, |
| "learning_rate": 3.6474245765961623e-06, |
| "loss": 1.1802, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.7473762010347377, |
| "grad_norm": 0.6215636460183582, |
| "learning_rate": 3.636798465066537e-06, |
| "loss": 1.161, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.747770386794777, |
| "grad_norm": 0.6324476045738789, |
| "learning_rate": 3.6261844134471434e-06, |
| "loss": 1.2743, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.7481645725548165, |
| "grad_norm": 0.6229098227690751, |
| "learning_rate": 3.6155824418543482e-06, |
| "loss": 1.1813, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.7485587583148559, |
| "grad_norm": 0.6090812575135249, |
| "learning_rate": 3.604992570381621e-06, |
| "loss": 1.1345, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.7489529440748953, |
| "grad_norm": 0.6175559157353252, |
| "learning_rate": 3.5944148190995077e-06, |
| "loss": 1.2318, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.7493471298349347, |
| "grad_norm": 0.6151430132474782, |
| "learning_rate": 3.583849208055582e-06, |
| "loss": 1.1515, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.7497413155949741, |
| "grad_norm": 0.6150817757122007, |
| "learning_rate": 3.573295757274401e-06, |
| "loss": 1.1709, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.7501355013550135, |
| "grad_norm": 0.6206530860937504, |
| "learning_rate": 3.562754486757477e-06, |
| "loss": 1.2368, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.750529687115053, |
| "grad_norm": 0.6187559303708384, |
| "learning_rate": 3.5522254164832458e-06, |
| "loss": 1.166, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.7509238728750924, |
| "grad_norm": 0.6050479857846883, |
| "learning_rate": 3.5417085664070127e-06, |
| "loss": 1.1884, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.7513180586351318, |
| "grad_norm": 0.6168601224584902, |
| "learning_rate": 3.5312039564609203e-06, |
| "loss": 1.179, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.7517122443951713, |
| "grad_norm": 0.6626157674267323, |
| "learning_rate": 3.5207116065539214e-06, |
| "loss": 1.2784, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.7521064301552106, |
| "grad_norm": 0.6204622203986804, |
| "learning_rate": 3.510231536571731e-06, |
| "loss": 1.1545, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.7525006159152501, |
| "grad_norm": 0.6025298592606017, |
| "learning_rate": 3.4997637663767827e-06, |
| "loss": 1.1623, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.7528948016752894, |
| "grad_norm": 0.6686746729115949, |
| "learning_rate": 3.4893083158082096e-06, |
| "loss": 1.225, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.7532889874353289, |
| "grad_norm": 0.6770303268213698, |
| "learning_rate": 3.4788652046817885e-06, |
| "loss": 1.1987, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.7536831731953684, |
| "grad_norm": 0.6169292952669728, |
| "learning_rate": 3.4684344527899117e-06, |
| "loss": 1.1413, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.7540773589554077, |
| "grad_norm": 0.6485841260675642, |
| "learning_rate": 3.458016079901544e-06, |
| "loss": 1.1747, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.7544715447154472, |
| "grad_norm": 0.644634311279479, |
| "learning_rate": 3.447610105762197e-06, |
| "loss": 1.1688, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.7548657304754866, |
| "grad_norm": 0.5954331888752692, |
| "learning_rate": 3.4372165500938813e-06, |
| "loss": 1.1999, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.755259916235526, |
| "grad_norm": 0.617923959960479, |
| "learning_rate": 3.4268354325950637e-06, |
| "loss": 1.2101, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.7556541019955654, |
| "grad_norm": 0.6202978534151761, |
| "learning_rate": 3.4164667729406487e-06, |
| "loss": 1.1168, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.7560482877556048, |
| "grad_norm": 0.6139453726018187, |
| "learning_rate": 3.4061105907819202e-06, |
| "loss": 1.107, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.7564424735156442, |
| "grad_norm": 0.6199465940139608, |
| "learning_rate": 3.395766905746515e-06, |
| "loss": 1.2331, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.7568366592756837, |
| "grad_norm": 0.6121258940736186, |
| "learning_rate": 3.3854357374383905e-06, |
| "loss": 1.1512, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.7572308450357231, |
| "grad_norm": 0.6192952901355329, |
| "learning_rate": 3.375117105437784e-06, |
| "loss": 1.1992, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.7576250307957625, |
| "grad_norm": 0.6428452093914235, |
| "learning_rate": 3.3648110293011592e-06, |
| "loss": 1.2009, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.758019216555802, |
| "grad_norm": 0.632857445152661, |
| "learning_rate": 3.3545175285611986e-06, |
| "loss": 1.2031, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.7584134023158413, |
| "grad_norm": 0.61203461189701, |
| "learning_rate": 3.344236622726743e-06, |
| "loss": 1.128, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.7588075880758808, |
| "grad_norm": 0.5940930582433119, |
| "learning_rate": 3.333968331282759e-06, |
| "loss": 1.1638, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.7592017738359201, |
| "grad_norm": 0.6128730590023086, |
| "learning_rate": 3.3237126736903168e-06, |
| "loss": 1.1636, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.7595959595959596, |
| "grad_norm": 0.6453501409856305, |
| "learning_rate": 3.313469669386532e-06, |
| "loss": 1.2196, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.7599901453559991, |
| "grad_norm": 0.6462479993428716, |
| "learning_rate": 3.303239337784547e-06, |
| "loss": 1.1757, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.7603843311160384, |
| "grad_norm": 0.6223443320198161, |
| "learning_rate": 3.2930216982734775e-06, |
| "loss": 1.2022, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7607785168760779, |
| "grad_norm": 0.6012467834584495, |
| "learning_rate": 3.2828167702183945e-06, |
| "loss": 1.1624, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7611727026361172, |
| "grad_norm": 0.6212867293615743, |
| "learning_rate": 3.272624572960269e-06, |
| "loss": 1.1469, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.7615668883961567, |
| "grad_norm": 0.623426678936357, |
| "learning_rate": 3.262445125815945e-06, |
| "loss": 1.2142, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.7619610741561961, |
| "grad_norm": 0.6174911641351716, |
| "learning_rate": 3.2522784480781057e-06, |
| "loss": 1.229, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7623552599162355, |
| "grad_norm": 0.6458478147860737, |
| "learning_rate": 3.242124559015234e-06, |
| "loss": 1.2307, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7627494456762749, |
| "grad_norm": 0.6139695821784812, |
| "learning_rate": 3.2319834778715662e-06, |
| "loss": 1.1993, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7631436314363144, |
| "grad_norm": 0.6244967897448498, |
| "learning_rate": 3.221855223867076e-06, |
| "loss": 1.1983, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.7635378171963538, |
| "grad_norm": 0.6167092879774253, |
| "learning_rate": 3.211739816197419e-06, |
| "loss": 1.139, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.7639320029563932, |
| "grad_norm": 0.6253757235990433, |
| "learning_rate": 3.2016372740339e-06, |
| "loss": 1.2246, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7643261887164327, |
| "grad_norm": 0.625945816934853, |
| "learning_rate": 3.1915476165234505e-06, |
| "loss": 1.1534, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.764720374476472, |
| "grad_norm": 0.6294175091707643, |
| "learning_rate": 3.1814708627885736e-06, |
| "loss": 1.2087, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7651145602365115, |
| "grad_norm": 0.6174964988395791, |
| "learning_rate": 3.171407031927325e-06, |
| "loss": 1.2108, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7655087459965508, |
| "grad_norm": 0.6692493984724502, |
| "learning_rate": 3.161356143013258e-06, |
| "loss": 1.2602, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7659029317565903, |
| "grad_norm": 0.6049874736921799, |
| "learning_rate": 3.1513182150954067e-06, |
| "loss": 1.1283, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.7662971175166298, |
| "grad_norm": 0.6170567402312764, |
| "learning_rate": 3.1412932671982368e-06, |
| "loss": 1.1787, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7666913032766691, |
| "grad_norm": 0.5939532563374448, |
| "learning_rate": 3.131281318321607e-06, |
| "loss": 1.1134, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7670854890367086, |
| "grad_norm": 0.6073844909969783, |
| "learning_rate": 3.1212823874407517e-06, |
| "loss": 1.1714, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.767479674796748, |
| "grad_norm": 0.6102814200245192, |
| "learning_rate": 3.1112964935062297e-06, |
| "loss": 1.172, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.7678738605567874, |
| "grad_norm": 0.6156593525633267, |
| "learning_rate": 3.101323655443882e-06, |
| "loss": 1.2028, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7682680463168268, |
| "grad_norm": 0.630439880503606, |
| "learning_rate": 3.0913638921548195e-06, |
| "loss": 1.1547, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7686622320768662, |
| "grad_norm": 0.596623146889128, |
| "learning_rate": 3.0814172225153626e-06, |
| "loss": 1.1191, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7690564178369056, |
| "grad_norm": 0.6035005020079766, |
| "learning_rate": 3.0714836653770153e-06, |
| "loss": 1.1602, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7694506035969451, |
| "grad_norm": 0.6229719405653049, |
| "learning_rate": 3.0615632395664395e-06, |
| "loss": 1.2358, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7698447893569845, |
| "grad_norm": 0.6172825849164519, |
| "learning_rate": 3.051655963885398e-06, |
| "loss": 1.1966, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7702389751170239, |
| "grad_norm": 0.6286383648446865, |
| "learning_rate": 3.0417618571107443e-06, |
| "loss": 1.1964, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.7706331608770634, |
| "grad_norm": 0.6108360343185555, |
| "learning_rate": 3.0318809379943594e-06, |
| "loss": 1.1728, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7710273466371027, |
| "grad_norm": 0.6362153250389974, |
| "learning_rate": 3.022013225263142e-06, |
| "loss": 1.2236, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7714215323971422, |
| "grad_norm": 0.6344908938517139, |
| "learning_rate": 3.0121587376189544e-06, |
| "loss": 1.2053, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7718157181571815, |
| "grad_norm": 0.6201739659408967, |
| "learning_rate": 3.00231749373859e-06, |
| "loss": 1.1537, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.772209903917221, |
| "grad_norm": 0.6100774811460168, |
| "learning_rate": 2.992489512273754e-06, |
| "loss": 1.1984, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.7726040896772605, |
| "grad_norm": 0.6232200126606358, |
| "learning_rate": 2.9826748118510107e-06, |
| "loss": 1.2338, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7729982754372998, |
| "grad_norm": 0.6325714051449248, |
| "learning_rate": 2.972873411071745e-06, |
| "loss": 1.1917, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7733924611973393, |
| "grad_norm": 0.6152245310127229, |
| "learning_rate": 2.9630853285121506e-06, |
| "loss": 1.2181, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.7737866469573786, |
| "grad_norm": 0.6382727314998073, |
| "learning_rate": 2.9533105827231677e-06, |
| "loss": 1.2374, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7741808327174181, |
| "grad_norm": 0.6093019906684419, |
| "learning_rate": 2.9435491922304603e-06, |
| "loss": 1.2039, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7745750184774575, |
| "grad_norm": 0.6466162600658065, |
| "learning_rate": 2.933801175534392e-06, |
| "loss": 1.2507, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7749692042374969, |
| "grad_norm": 0.6172944871295347, |
| "learning_rate": 2.9240665511099643e-06, |
| "loss": 1.1777, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7753633899975363, |
| "grad_norm": 0.6025058965161826, |
| "learning_rate": 2.914345337406812e-06, |
| "loss": 1.1488, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7757575757575758, |
| "grad_norm": 0.6283140418676793, |
| "learning_rate": 2.9046375528491378e-06, |
| "loss": 1.2246, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7761517615176152, |
| "grad_norm": 0.6174686412053484, |
| "learning_rate": 2.8949432158357083e-06, |
| "loss": 1.1603, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7765459472776546, |
| "grad_norm": 0.6249876696519094, |
| "learning_rate": 2.885262344739792e-06, |
| "loss": 1.2378, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.776940133037694, |
| "grad_norm": 0.6155008238993236, |
| "learning_rate": 2.875594957909136e-06, |
| "loss": 1.1734, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.7773343187977334, |
| "grad_norm": 0.6070997737354649, |
| "learning_rate": 2.865941073665942e-06, |
| "loss": 1.1533, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.7777285045577729, |
| "grad_norm": 0.6285112446428368, |
| "learning_rate": 2.8563007103068075e-06, |
| "loss": 1.2374, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7781226903178122, |
| "grad_norm": 0.6292319074803627, |
| "learning_rate": 2.8466738861027143e-06, |
| "loss": 1.1764, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7785168760778517, |
| "grad_norm": 0.6280895354859987, |
| "learning_rate": 2.8370606192989826e-06, |
| "loss": 1.2332, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7789110618378912, |
| "grad_norm": 0.6392848234961054, |
| "learning_rate": 2.8274609281152322e-06, |
| "loss": 1.1681, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7793052475979305, |
| "grad_norm": 0.6422553395733501, |
| "learning_rate": 2.8178748307453552e-06, |
| "loss": 1.1967, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.77969943335797, |
| "grad_norm": 0.6448664268947002, |
| "learning_rate": 2.8083023453574867e-06, |
| "loss": 1.1637, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7800936191180093, |
| "grad_norm": 0.6268688830101503, |
| "learning_rate": 2.7987434900939537e-06, |
| "loss": 1.1992, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7804878048780488, |
| "grad_norm": 0.6270584497214332, |
| "learning_rate": 2.7891982830712614e-06, |
| "loss": 1.215, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7808819906380882, |
| "grad_norm": 0.6136390949207409, |
| "learning_rate": 2.779666742380035e-06, |
| "loss": 1.1842, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7812761763981276, |
| "grad_norm": 0.6160721779555592, |
| "learning_rate": 2.7701488860850134e-06, |
| "loss": 1.1465, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.781670362158167, |
| "grad_norm": 0.6229572690437215, |
| "learning_rate": 2.7606447322249876e-06, |
| "loss": 1.1872, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7820645479182065, |
| "grad_norm": 0.6120891016882081, |
| "learning_rate": 2.7511542988127815e-06, |
| "loss": 1.1933, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7824587336782459, |
| "grad_norm": 0.6396299966743912, |
| "learning_rate": 2.7416776038352246e-06, |
| "loss": 1.2268, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.7828529194382853, |
| "grad_norm": 0.620606681831229, |
| "learning_rate": 2.732214665253092e-06, |
| "loss": 1.18, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7832471051983247, |
| "grad_norm": 0.6172045847652757, |
| "learning_rate": 2.7227655010011034e-06, |
| "loss": 1.2072, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7836412909583641, |
| "grad_norm": 0.6174655713344509, |
| "learning_rate": 2.7133301289878644e-06, |
| "loss": 1.1981, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.7840354767184036, |
| "grad_norm": 0.6453151721553436, |
| "learning_rate": 2.703908567095841e-06, |
| "loss": 1.2319, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7844296624784429, |
| "grad_norm": 0.6143239403662212, |
| "learning_rate": 2.694500833181323e-06, |
| "loss": 1.1539, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7848238482384824, |
| "grad_norm": 0.6118518639087388, |
| "learning_rate": 2.6851069450743996e-06, |
| "loss": 1.136, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7852180339985219, |
| "grad_norm": 0.621523302552173, |
| "learning_rate": 2.6757269205789118e-06, |
| "loss": 1.1884, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7856122197585612, |
| "grad_norm": 0.6177501269477549, |
| "learning_rate": 2.666360777472432e-06, |
| "loss": 1.1697, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7860064055186007, |
| "grad_norm": 0.6169578769905575, |
| "learning_rate": 2.6570085335062166e-06, |
| "loss": 1.149, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.78640059127864, |
| "grad_norm": 0.6384469724904461, |
| "learning_rate": 2.6476702064051873e-06, |
| "loss": 1.215, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7867947770386795, |
| "grad_norm": 0.6526331509523849, |
| "learning_rate": 2.638345813867883e-06, |
| "loss": 1.1834, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7871889627987189, |
| "grad_norm": 0.6384058053206544, |
| "learning_rate": 2.629035373566433e-06, |
| "loss": 1.2679, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7875831485587583, |
| "grad_norm": 0.6173186289000027, |
| "learning_rate": 2.6197389031465328e-06, |
| "loss": 1.1497, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.7879773343187977, |
| "grad_norm": 0.6179494011323186, |
| "learning_rate": 2.610456420227386e-06, |
| "loss": 1.155, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7883715200788372, |
| "grad_norm": 0.6495295068681656, |
| "learning_rate": 2.6011879424017006e-06, |
| "loss": 1.1627, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7887657058388766, |
| "grad_norm": 0.6124764762909571, |
| "learning_rate": 2.5919334872356384e-06, |
| "loss": 1.2092, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.789159891598916, |
| "grad_norm": 0.6267862591887654, |
| "learning_rate": 2.582693072268778e-06, |
| "loss": 1.2324, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7895540773589554, |
| "grad_norm": 0.640938297681364, |
| "learning_rate": 2.573466715014089e-06, |
| "loss": 1.1638, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7899482631189948, |
| "grad_norm": 0.6319357561158305, |
| "learning_rate": 2.5642544329579088e-06, |
| "loss": 1.1436, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7903424488790343, |
| "grad_norm": 0.6599757389441551, |
| "learning_rate": 2.5550562435598834e-06, |
| "loss": 1.1859, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7907366346390736, |
| "grad_norm": 0.6261460556185046, |
| "learning_rate": 2.5458721642529637e-06, |
| "loss": 1.2276, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7911308203991131, |
| "grad_norm": 0.6368615447497923, |
| "learning_rate": 2.536702212443345e-06, |
| "loss": 1.126, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7915250061591526, |
| "grad_norm": 0.6065232945787534, |
| "learning_rate": 2.5275464055104615e-06, |
| "loss": 1.1566, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7919191919191919, |
| "grad_norm": 0.6260924052346492, |
| "learning_rate": 2.5184047608069283e-06, |
| "loss": 1.2301, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7923133776792314, |
| "grad_norm": 0.5961679029421411, |
| "learning_rate": 2.509277295658521e-06, |
| "loss": 1.1195, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7927075634392707, |
| "grad_norm": 0.6880173744181591, |
| "learning_rate": 2.500164027364147e-06, |
| "loss": 1.1852, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7931017491993102, |
| "grad_norm": 0.591725360802608, |
| "learning_rate": 2.491064973195798e-06, |
| "loss": 1.1237, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.7934959349593496, |
| "grad_norm": 0.5975825860792612, |
| "learning_rate": 2.4819801503985365e-06, |
| "loss": 1.1518, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.793890120719389, |
| "grad_norm": 0.6221206271257661, |
| "learning_rate": 2.4729095761904487e-06, |
| "loss": 1.1838, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7942843064794284, |
| "grad_norm": 0.6271650798589434, |
| "learning_rate": 2.4638532677626124e-06, |
| "loss": 1.1672, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7946784922394678, |
| "grad_norm": 0.6395665538753358, |
| "learning_rate": 2.4548112422790695e-06, |
| "loss": 1.2002, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7950726779995073, |
| "grad_norm": 0.6087288790926827, |
| "learning_rate": 2.4457835168767975e-06, |
| "loss": 1.1194, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7954668637595467, |
| "grad_norm": 0.6099991672736873, |
| "learning_rate": 2.4367701086656625e-06, |
| "loss": 1.141, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7958610495195861, |
| "grad_norm": 0.6055519755469221, |
| "learning_rate": 2.4277710347284035e-06, |
| "loss": 1.1506, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7962552352796255, |
| "grad_norm": 0.653125514461312, |
| "learning_rate": 2.4187863121205933e-06, |
| "loss": 1.1804, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.796649421039665, |
| "grad_norm": 0.6025409266602508, |
| "learning_rate": 2.409815957870597e-06, |
| "loss": 1.1893, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7970436067997043, |
| "grad_norm": 0.6126866642525495, |
| "learning_rate": 2.400859988979555e-06, |
| "loss": 1.186, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7974377925597438, |
| "grad_norm": 0.6286983033908643, |
| "learning_rate": 2.3919184224213354e-06, |
| "loss": 1.1655, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7978319783197833, |
| "grad_norm": 0.5932553711308323, |
| "learning_rate": 2.3829912751425244e-06, |
| "loss": 1.1778, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.7982261640798226, |
| "grad_norm": 0.633166520052366, |
| "learning_rate": 2.374078564062364e-06, |
| "loss": 1.1589, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7986203498398621, |
| "grad_norm": 0.6299341383892152, |
| "learning_rate": 2.3651803060727484e-06, |
| "loss": 1.1603, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7990145355999014, |
| "grad_norm": 0.6223977799816698, |
| "learning_rate": 2.3562965180381746e-06, |
| "loss": 1.2036, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.7994087213599409, |
| "grad_norm": 0.6214882966307388, |
| "learning_rate": 2.3474272167957144e-06, |
| "loss": 1.1902, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7998029071199803, |
| "grad_norm": 0.6261786382679704, |
| "learning_rate": 2.3385724191549807e-06, |
| "loss": 1.1596, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.8001970928800197, |
| "grad_norm": 0.6179261386167846, |
| "learning_rate": 2.3297321418981077e-06, |
| "loss": 1.1601, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.8005912786400591, |
| "grad_norm": 0.6067017257945441, |
| "learning_rate": 2.3209064017797014e-06, |
| "loss": 1.1052, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.8009854644000985, |
| "grad_norm": 0.6030346397003117, |
| "learning_rate": 2.312095215526814e-06, |
| "loss": 1.1272, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.801379650160138, |
| "grad_norm": 0.6187228819182855, |
| "learning_rate": 2.3032985998389236e-06, |
| "loss": 1.2039, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.8017738359201774, |
| "grad_norm": 0.6190809264452526, |
| "learning_rate": 2.29451657138789e-06, |
| "loss": 1.2414, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.8021680216802168, |
| "grad_norm": 0.6083179570546223, |
| "learning_rate": 2.285749146817924e-06, |
| "loss": 1.1508, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.8025622074402562, |
| "grad_norm": 0.5937926599332075, |
| "learning_rate": 2.2769963427455555e-06, |
| "loss": 1.0988, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.8029563932002957, |
| "grad_norm": 0.6173897531116277, |
| "learning_rate": 2.2682581757596144e-06, |
| "loss": 1.1962, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.803350578960335, |
| "grad_norm": 0.5854683327803459, |
| "learning_rate": 2.259534662421179e-06, |
| "loss": 1.1119, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.8037447647203745, |
| "grad_norm": 0.6170817511105888, |
| "learning_rate": 2.2508258192635614e-06, |
| "loss": 1.1889, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.804138950480414, |
| "grad_norm": 0.6159894762027561, |
| "learning_rate": 2.242131662792272e-06, |
| "loss": 1.1667, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.8045331362404533, |
| "grad_norm": 0.6118649548400591, |
| "learning_rate": 2.2334522094849798e-06, |
| "loss": 1.1371, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.8049273220004928, |
| "grad_norm": 0.6392916794711796, |
| "learning_rate": 2.2247874757914865e-06, |
| "loss": 1.1846, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.8053215077605321, |
| "grad_norm": 0.5941927210409212, |
| "learning_rate": 2.2161374781337084e-06, |
| "loss": 1.1291, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.8057156935205716, |
| "grad_norm": 0.6294242082032777, |
| "learning_rate": 2.2075022329056193e-06, |
| "loss": 1.2009, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.806109879280611, |
| "grad_norm": 0.6422605646655121, |
| "learning_rate": 2.198881756473238e-06, |
| "loss": 1.2299, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.8065040650406504, |
| "grad_norm": 0.6563848866016602, |
| "learning_rate": 2.190276065174596e-06, |
| "loss": 1.2258, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.8068982508006898, |
| "grad_norm": 0.6448012423504815, |
| "learning_rate": 2.1816851753197023e-06, |
| "loss": 1.1881, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.8072924365607292, |
| "grad_norm": 0.597728406050263, |
| "learning_rate": 2.1731091031905118e-06, |
| "loss": 1.1688, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.8076866223207687, |
| "grad_norm": 0.5886841825944683, |
| "learning_rate": 2.164547865040889e-06, |
| "loss": 1.124, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.8080808080808081, |
| "grad_norm": 0.6142796262742458, |
| "learning_rate": 2.156001477096601e-06, |
| "loss": 1.2032, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.8084749938408475, |
| "grad_norm": 0.6175251461681956, |
| "learning_rate": 2.1474699555552527e-06, |
| "loss": 1.1787, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.8088691796008869, |
| "grad_norm": 0.6139100518824416, |
| "learning_rate": 2.138953316586283e-06, |
| "loss": 1.1953, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.8092633653609264, |
| "grad_norm": 0.6430044371047359, |
| "learning_rate": 2.130451576330925e-06, |
| "loss": 1.2208, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.8096575511209657, |
| "grad_norm": 0.6111371447533479, |
| "learning_rate": 2.12196475090217e-06, |
| "loss": 1.1537, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.8100517368810052, |
| "grad_norm": 0.6150669801063049, |
| "learning_rate": 2.113492856384741e-06, |
| "loss": 1.1211, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.8104459226410446, |
| "grad_norm": 0.6290841991274971, |
| "learning_rate": 2.1050359088350724e-06, |
| "loss": 1.2084, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.810840108401084, |
| "grad_norm": 0.6053161669582096, |
| "learning_rate": 2.0965939242812594e-06, |
| "loss": 1.1343, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.8112342941611235, |
| "grad_norm": 0.623034572056998, |
| "learning_rate": 2.0881669187230415e-06, |
| "loss": 1.1616, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.8116284799211628, |
| "grad_norm": 0.6122769163475099, |
| "learning_rate": 2.0797549081317724e-06, |
| "loss": 1.1639, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.8120226656812023, |
| "grad_norm": 0.6241014032007793, |
| "learning_rate": 2.0713579084503877e-06, |
| "loss": 1.2213, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.8124168514412416, |
| "grad_norm": 0.6054665326241209, |
| "learning_rate": 2.0629759355933665e-06, |
| "loss": 1.183, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.8128110372012811, |
| "grad_norm": 0.6131850542325953, |
| "learning_rate": 2.0546090054467118e-06, |
| "loss": 1.1867, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.8132052229613205, |
| "grad_norm": 0.5905612318597147, |
| "learning_rate": 2.0462571338679204e-06, |
| "loss": 1.1652, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.8135994087213599, |
| "grad_norm": 0.6086745867605593, |
| "learning_rate": 2.0379203366859413e-06, |
| "loss": 1.1749, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.8139935944813994, |
| "grad_norm": 0.6547726012282458, |
| "learning_rate": 2.0295986297011603e-06, |
| "loss": 1.2606, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.8143877802414388, |
| "grad_norm": 0.6176365863473255, |
| "learning_rate": 2.0212920286853656e-06, |
| "loss": 1.1631, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.8147819660014782, |
| "grad_norm": 0.5969133841837041, |
| "learning_rate": 2.0130005493817063e-06, |
| "loss": 1.1818, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.8151761517615176, |
| "grad_norm": 0.6095137689005168, |
| "learning_rate": 2.004724207504675e-06, |
| "loss": 1.1147, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.815570337521557, |
| "grad_norm": 0.6149824366682144, |
| "learning_rate": 1.9964630187400834e-06, |
| "loss": 1.1667, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.8159645232815964, |
| "grad_norm": 0.6076416587106072, |
| "learning_rate": 1.988216998745014e-06, |
| "loss": 1.1657, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.8163587090416359, |
| "grad_norm": 0.6378102035141168, |
| "learning_rate": 1.9799861631478013e-06, |
| "loss": 1.1748, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.8167528948016753, |
| "grad_norm": 0.6018846786576992, |
| "learning_rate": 1.971770527548008e-06, |
| "loss": 1.1243, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.8171470805617147, |
| "grad_norm": 0.6072693290996355, |
| "learning_rate": 1.9635701075163884e-06, |
| "loss": 1.1456, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.8175412663217542, |
| "grad_norm": 0.6188901773945752, |
| "learning_rate": 1.9553849185948514e-06, |
| "loss": 1.2303, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.8179354520817935, |
| "grad_norm": 0.6652688896175301, |
| "learning_rate": 1.947214976296443e-06, |
| "loss": 1.2502, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.818329637841833, |
| "grad_norm": 0.6180903878734494, |
| "learning_rate": 1.9390602961053194e-06, |
| "loss": 1.156, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.8187238236018723, |
| "grad_norm": 0.6125254270472376, |
| "learning_rate": 1.930920893476701e-06, |
| "loss": 1.1941, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.8191180093619118, |
| "grad_norm": 0.623138908331946, |
| "learning_rate": 1.9227967838368566e-06, |
| "loss": 1.1965, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.8195121951219512, |
| "grad_norm": 0.615972707734638, |
| "learning_rate": 1.9146879825830753e-06, |
| "loss": 1.1691, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.8199063808819906, |
| "grad_norm": 0.6000820870368339, |
| "learning_rate": 1.9065945050836299e-06, |
| "loss": 1.1169, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.8203005666420301, |
| "grad_norm": 0.609742615231763, |
| "learning_rate": 1.8985163666777473e-06, |
| "loss": 1.1694, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.8206947524020695, |
| "grad_norm": 0.6200332366286192, |
| "learning_rate": 1.890453582675591e-06, |
| "loss": 1.1225, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.8210889381621089, |
| "grad_norm": 0.6145307042295974, |
| "learning_rate": 1.882406168358215e-06, |
| "loss": 1.1893, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.8214831239221483, |
| "grad_norm": 0.613663996359055, |
| "learning_rate": 1.8743741389775472e-06, |
| "loss": 1.2003, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.8218773096821878, |
| "grad_norm": 0.6163140729383925, |
| "learning_rate": 1.866357509756358e-06, |
| "loss": 1.1625, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.8222714954422271, |
| "grad_norm": 0.6093496583736225, |
| "learning_rate": 1.8583562958882329e-06, |
| "loss": 1.1604, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.8226656812022666, |
| "grad_norm": 0.6112581505765976, |
| "learning_rate": 1.8503705125375382e-06, |
| "loss": 1.12, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.823059866962306, |
| "grad_norm": 0.6187957102380715, |
| "learning_rate": 1.8424001748393905e-06, |
| "loss": 1.2006, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.8234540527223454, |
| "grad_norm": 0.6131303613972927, |
| "learning_rate": 1.8344452978996441e-06, |
| "loss": 1.1182, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.8238482384823849, |
| "grad_norm": 0.6096435231696508, |
| "learning_rate": 1.8265058967948434e-06, |
| "loss": 1.0993, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.8242424242424242, |
| "grad_norm": 0.6188414868551905, |
| "learning_rate": 1.818581986572201e-06, |
| "loss": 1.2266, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.8246366100024637, |
| "grad_norm": 0.6187428595993414, |
| "learning_rate": 1.8106735822495746e-06, |
| "loss": 1.2269, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.825030795762503, |
| "grad_norm": 0.6158407049072168, |
| "learning_rate": 1.8027806988154373e-06, |
| "loss": 1.1678, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.8254249815225425, |
| "grad_norm": 0.6274441437082312, |
| "learning_rate": 1.794903351228835e-06, |
| "loss": 1.2211, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.8258191672825819, |
| "grad_norm": 0.6161979943389017, |
| "learning_rate": 1.7870415544193808e-06, |
| "loss": 1.1381, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.8262133530426213, |
| "grad_norm": 0.6192811967277538, |
| "learning_rate": 1.7791953232872083e-06, |
| "loss": 1.1739, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.8266075388026608, |
| "grad_norm": 0.6261988603055474, |
| "learning_rate": 1.7713646727029476e-06, |
| "loss": 1.1864, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.8270017245627002, |
| "grad_norm": 0.6383885993657525, |
| "learning_rate": 1.7635496175077082e-06, |
| "loss": 1.1576, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.8273959103227396, |
| "grad_norm": 0.6401328645174053, |
| "learning_rate": 1.755750172513041e-06, |
| "loss": 1.1973, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.827790096082779, |
| "grad_norm": 0.6520856363314526, |
| "learning_rate": 1.747966352500904e-06, |
| "loss": 1.2282, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.8281842818428184, |
| "grad_norm": 0.6338910603246662, |
| "learning_rate": 1.7401981722236438e-06, |
| "loss": 1.175, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.8285784676028578, |
| "grad_norm": 0.614780711742896, |
| "learning_rate": 1.7324456464039751e-06, |
| "loss": 1.219, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.8289726533628973, |
| "grad_norm": 0.6320193678396515, |
| "learning_rate": 1.7247087897349334e-06, |
| "loss": 1.234, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.8293668391229367, |
| "grad_norm": 0.6148462845714023, |
| "learning_rate": 1.7169876168798561e-06, |
| "loss": 1.207, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.8297610248829761, |
| "grad_norm": 0.6183637939087829, |
| "learning_rate": 1.7092821424723637e-06, |
| "loss": 1.191, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.8301552106430156, |
| "grad_norm": 0.6242963838055702, |
| "learning_rate": 1.7015923811163225e-06, |
| "loss": 1.2022, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.8305493964030549, |
| "grad_norm": 0.5988324551990205, |
| "learning_rate": 1.6939183473858101e-06, |
| "loss": 1.1113, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.8309435821630944, |
| "grad_norm": 0.6110399627678608, |
| "learning_rate": 1.6862600558251097e-06, |
| "loss": 1.14, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.8313377679231337, |
| "grad_norm": 0.6048300072512719, |
| "learning_rate": 1.6786175209486565e-06, |
| "loss": 1.1364, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.8317319536831732, |
| "grad_norm": 0.6191088800533002, |
| "learning_rate": 1.6709907572410266e-06, |
| "loss": 1.1591, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.8321261394432126, |
| "grad_norm": 0.6374165341976098, |
| "learning_rate": 1.6633797791569085e-06, |
| "loss": 1.1927, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.832520325203252, |
| "grad_norm": 0.6047378641330573, |
| "learning_rate": 1.6557846011210753e-06, |
| "loss": 1.1895, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.8329145109632915, |
| "grad_norm": 0.6180978122031335, |
| "learning_rate": 1.6482052375283442e-06, |
| "loss": 1.1932, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.8333086967233309, |
| "grad_norm": 0.6187193373594739, |
| "learning_rate": 1.6406417027435728e-06, |
| "loss": 1.2001, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.8337028824833703, |
| "grad_norm": 0.6055455770427833, |
| "learning_rate": 1.6330940111016103e-06, |
| "loss": 1.2135, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.8340970682434097, |
| "grad_norm": 0.6226585371397162, |
| "learning_rate": 1.6255621769072805e-06, |
| "loss": 1.2023, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.8344912540034491, |
| "grad_norm": 0.5949274417124252, |
| "learning_rate": 1.6180462144353526e-06, |
| "loss": 1.1744, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.8348854397634885, |
| "grad_norm": 0.6339414631453146, |
| "learning_rate": 1.6105461379305187e-06, |
| "loss": 1.1836, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.835279625523528, |
| "grad_norm": 0.6095339519814128, |
| "learning_rate": 1.6030619616073628e-06, |
| "loss": 1.1468, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.8356738112835674, |
| "grad_norm": 0.6227699723957059, |
| "learning_rate": 1.5955936996503285e-06, |
| "loss": 1.1617, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.8360679970436068, |
| "grad_norm": 0.6058636715701863, |
| "learning_rate": 1.5881413662137047e-06, |
| "loss": 1.2089, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.8364621828036463, |
| "grad_norm": 0.6345005146493108, |
| "learning_rate": 1.580704975421584e-06, |
| "loss": 1.2159, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.8368563685636856, |
| "grad_norm": 0.6425121234333704, |
| "learning_rate": 1.5732845413678477e-06, |
| "loss": 1.1546, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.8372505543237251, |
| "grad_norm": 0.6217776321143101, |
| "learning_rate": 1.5658800781161365e-06, |
| "loss": 1.1201, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.8376447400837644, |
| "grad_norm": 0.6291793073582329, |
| "learning_rate": 1.5584915996998217e-06, |
| "loss": 1.2199, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.8380389258438039, |
| "grad_norm": 0.6413491306262445, |
| "learning_rate": 1.5511191201219733e-06, |
| "loss": 1.1387, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.8384331116038433, |
| "grad_norm": 0.5968787571090911, |
| "learning_rate": 1.5437626533553497e-06, |
| "loss": 1.1677, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.8388272973638827, |
| "grad_norm": 0.6266812335989616, |
| "learning_rate": 1.5364222133423523e-06, |
| "loss": 1.1488, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.8392214831239222, |
| "grad_norm": 0.6179499573451991, |
| "learning_rate": 1.5290978139950108e-06, |
| "loss": 1.1462, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.8396156688839616, |
| "grad_norm": 0.6020456787105313, |
| "learning_rate": 1.521789469194952e-06, |
| "loss": 1.1895, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.840009854644001, |
| "grad_norm": 0.6142152475528356, |
| "learning_rate": 1.514497192793377e-06, |
| "loss": 1.1928, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.8404040404040404, |
| "grad_norm": 0.6418120903036971, |
| "learning_rate": 1.5072209986110376e-06, |
| "loss": 1.1873, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.8407982261640798, |
| "grad_norm": 0.6022912765250543, |
| "learning_rate": 1.4999609004381944e-06, |
| "loss": 1.1693, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.8411924119241192, |
| "grad_norm": 0.6241117050709148, |
| "learning_rate": 1.492716912034614e-06, |
| "loss": 1.1556, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.8415865976841587, |
| "grad_norm": 0.6088366197098409, |
| "learning_rate": 1.4854890471295225e-06, |
| "loss": 1.2307, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.8419807834441981, |
| "grad_norm": 0.626345154331026, |
| "learning_rate": 1.4782773194215883e-06, |
| "loss": 1.1245, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.8423749692042375, |
| "grad_norm": 0.6214268575987325, |
| "learning_rate": 1.4710817425789015e-06, |
| "loss": 1.1974, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.842769154964277, |
| "grad_norm": 0.6157509713525812, |
| "learning_rate": 1.4639023302389366e-06, |
| "loss": 1.1889, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.8431633407243163, |
| "grad_norm": 0.6351261747898632, |
| "learning_rate": 1.4567390960085325e-06, |
| "loss": 1.1981, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.8435575264843558, |
| "grad_norm": 0.6067571512713051, |
| "learning_rate": 1.4495920534638741e-06, |
| "loss": 1.1582, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.8439517122443951, |
| "grad_norm": 0.607006794382876, |
| "learning_rate": 1.4424612161504482e-06, |
| "loss": 1.1623, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.8443458980044346, |
| "grad_norm": 0.5784739791881964, |
| "learning_rate": 1.435346597583034e-06, |
| "loss": 1.116, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.844740083764474, |
| "grad_norm": 0.6124576542474655, |
| "learning_rate": 1.4282482112456686e-06, |
| "loss": 1.1986, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.8451342695245134, |
| "grad_norm": 0.6311729127767527, |
| "learning_rate": 1.4211660705916286e-06, |
| "loss": 1.2564, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.8455284552845529, |
| "grad_norm": 0.6337920894968637, |
| "learning_rate": 1.4141001890434035e-06, |
| "loss": 1.2245, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.8459226410445922, |
| "grad_norm": 0.5962616813895122, |
| "learning_rate": 1.407050579992658e-06, |
| "loss": 1.1572, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.8463168268046317, |
| "grad_norm": 0.6077208562957639, |
| "learning_rate": 1.4000172568002268e-06, |
| "loss": 1.1588, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.8467110125646711, |
| "grad_norm": 0.6206827599971425, |
| "learning_rate": 1.3930002327960702e-06, |
| "loss": 1.2329, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.8471051983247105, |
| "grad_norm": 0.6031727874430762, |
| "learning_rate": 1.385999521279261e-06, |
| "loss": 1.1409, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.8474993840847499, |
| "grad_norm": 0.6034983041379499, |
| "learning_rate": 1.3790151355179581e-06, |
| "loss": 1.2088, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.8478935698447894, |
| "grad_norm": 0.5944921464470333, |
| "learning_rate": 1.372047088749372e-06, |
| "loss": 1.1279, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.8482877556048288, |
| "grad_norm": 0.6214516653434409, |
| "learning_rate": 1.365095394179754e-06, |
| "loss": 1.2763, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.8486819413648682, |
| "grad_norm": 0.6442848968344648, |
| "learning_rate": 1.3581600649843617e-06, |
| "loss": 1.2047, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.8490761271249077, |
| "grad_norm": 0.6069453066470716, |
| "learning_rate": 1.3512411143074333e-06, |
| "loss": 1.1663, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.849470312884947, |
| "grad_norm": 0.632212528850588, |
| "learning_rate": 1.344338555262168e-06, |
| "loss": 1.1797, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.8498644986449865, |
| "grad_norm": 0.6551418490552343, |
| "learning_rate": 1.3374524009306944e-06, |
| "loss": 1.2136, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.8502586844050258, |
| "grad_norm": 0.6182185289441392, |
| "learning_rate": 1.3305826643640552e-06, |
| "loss": 1.1878, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.8506528701650653, |
| "grad_norm": 0.6177346028571237, |
| "learning_rate": 1.3237293585821786e-06, |
| "loss": 1.1659, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.8510470559251047, |
| "grad_norm": 0.6174374468477092, |
| "learning_rate": 1.316892496573845e-06, |
| "loss": 1.1553, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.8514412416851441, |
| "grad_norm": 0.6130949007768408, |
| "learning_rate": 1.310072091296677e-06, |
| "loss": 1.1732, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.8518354274451836, |
| "grad_norm": 0.6061989244208447, |
| "learning_rate": 1.303268155677101e-06, |
| "loss": 1.1714, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.852229613205223, |
| "grad_norm": 0.6088152483466427, |
| "learning_rate": 1.296480702610332e-06, |
| "loss": 1.1614, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.8526237989652624, |
| "grad_norm": 0.6410096353876902, |
| "learning_rate": 1.2897097449603491e-06, |
| "loss": 1.243, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.8530179847253018, |
| "grad_norm": 0.6215005861175246, |
| "learning_rate": 1.2829552955598623e-06, |
| "loss": 1.2266, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.8534121704853412, |
| "grad_norm": 0.6308618646844184, |
| "learning_rate": 1.2762173672102996e-06, |
| "loss": 1.2355, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.8538063562453806, |
| "grad_norm": 0.611573077552191, |
| "learning_rate": 1.269495972681777e-06, |
| "loss": 1.1797, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.8542005420054201, |
| "grad_norm": 0.6275131772886295, |
| "learning_rate": 1.2627911247130709e-06, |
| "loss": 1.1919, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.8545947277654595, |
| "grad_norm": 0.5993315352532911, |
| "learning_rate": 1.2561028360116002e-06, |
| "loss": 1.1554, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.8549889135254989, |
| "grad_norm": 0.6007090422412275, |
| "learning_rate": 1.2494311192533958e-06, |
| "loss": 1.1593, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.8553830992855384, |
| "grad_norm": 0.6260215764887312, |
| "learning_rate": 1.242775987083088e-06, |
| "loss": 1.1785, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.8557772850455777, |
| "grad_norm": 0.6072634488679926, |
| "learning_rate": 1.2361374521138724e-06, |
| "loss": 1.1744, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.8561714708056172, |
| "grad_norm": 0.6121816712097319, |
| "learning_rate": 1.2295155269274827e-06, |
| "loss": 1.1959, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.8565656565656565, |
| "grad_norm": 0.60232884933228, |
| "learning_rate": 1.2229102240741819e-06, |
| "loss": 1.1909, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.856959842325696, |
| "grad_norm": 0.6219022324990678, |
| "learning_rate": 1.2163215560727215e-06, |
| "loss": 1.2573, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.8573540280857354, |
| "grad_norm": 0.6432583376483387, |
| "learning_rate": 1.2097495354103284e-06, |
| "loss": 1.153, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.8577482138457748, |
| "grad_norm": 0.6057914024761237, |
| "learning_rate": 1.2031941745426824e-06, |
| "loss": 1.1835, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.8581423996058143, |
| "grad_norm": 0.5896128109103955, |
| "learning_rate": 1.1966554858938805e-06, |
| "loss": 1.1695, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.8585365853658536, |
| "grad_norm": 0.611114769313689, |
| "learning_rate": 1.1901334818564291e-06, |
| "loss": 1.1891, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.8589307711258931, |
| "grad_norm": 0.6057440341466516, |
| "learning_rate": 1.1836281747912125e-06, |
| "loss": 1.1829, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.8593249568859325, |
| "grad_norm": 0.6070873449171827, |
| "learning_rate": 1.1771395770274653e-06, |
| "loss": 1.1444, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.8597191426459719, |
| "grad_norm": 0.6173928300019214, |
| "learning_rate": 1.1706677008627564e-06, |
| "loss": 1.1758, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.8601133284060113, |
| "grad_norm": 0.620761797942304, |
| "learning_rate": 1.1642125585629593e-06, |
| "loss": 1.2022, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.8605075141660508, |
| "grad_norm": 0.6296457077216101, |
| "learning_rate": 1.1577741623622407e-06, |
| "loss": 1.1907, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.8609016999260902, |
| "grad_norm": 0.6203549213795299, |
| "learning_rate": 1.1513525244630198e-06, |
| "loss": 1.2293, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.8612958856861296, |
| "grad_norm": 0.6120086583589758, |
| "learning_rate": 1.1449476570359608e-06, |
| "loss": 1.118, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.861690071446169, |
| "grad_norm": 0.6044150524885432, |
| "learning_rate": 1.1385595722199438e-06, |
| "loss": 1.1275, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.8620842572062084, |
| "grad_norm": 0.6216834948320731, |
| "learning_rate": 1.1321882821220375e-06, |
| "loss": 1.2583, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.8624784429662479, |
| "grad_norm": 0.6314861381611362, |
| "learning_rate": 1.1258337988174794e-06, |
| "loss": 1.1917, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.8628726287262872, |
| "grad_norm": 0.6086856686806165, |
| "learning_rate": 1.1194961343496603e-06, |
| "loss": 1.2272, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.8632668144863267, |
| "grad_norm": 0.5983542589167679, |
| "learning_rate": 1.1131753007300884e-06, |
| "loss": 1.1747, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.863661000246366, |
| "grad_norm": 0.6196216583286488, |
| "learning_rate": 1.1068713099383754e-06, |
| "loss": 1.1563, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.8640551860064055, |
| "grad_norm": 0.622973730967306, |
| "learning_rate": 1.1005841739222166e-06, |
| "loss": 1.1721, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.864449371766445, |
| "grad_norm": 0.6084922385739949, |
| "learning_rate": 1.094313904597355e-06, |
| "loss": 1.2149, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.8648435575264843, |
| "grad_norm": 0.6017658686517071, |
| "learning_rate": 1.0880605138475708e-06, |
| "loss": 1.1582, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.8652377432865238, |
| "grad_norm": 0.6242920242129635, |
| "learning_rate": 1.0818240135246528e-06, |
| "loss": 1.2032, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.8656319290465632, |
| "grad_norm": 0.627892199233753, |
| "learning_rate": 1.0756044154483813e-06, |
| "loss": 1.2027, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.8660261148066026, |
| "grad_norm": 0.630460438152927, |
| "learning_rate": 1.0694017314064997e-06, |
| "loss": 1.2043, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.866420300566642, |
| "grad_norm": 0.5912369379567544, |
| "learning_rate": 1.0632159731546965e-06, |
| "loss": 1.1947, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.8668144863266815, |
| "grad_norm": 0.6032500593156851, |
| "learning_rate": 1.057047152416585e-06, |
| "loss": 1.229, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.8672086720867209, |
| "grad_norm": 0.6224700658910649, |
| "learning_rate": 1.0508952808836682e-06, |
| "loss": 1.1966, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.8676028578467603, |
| "grad_norm": 0.5995356945189887, |
| "learning_rate": 1.044760370215333e-06, |
| "loss": 1.1371, |
| "step": 2201 |
| }, |
| { |
| "epoch": 0.8679970436067997, |
| "grad_norm": 0.6264831422167915, |
| "learning_rate": 1.038642432038821e-06, |
| "loss": 1.1853, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.8683912293668391, |
| "grad_norm": 0.6112624994424279, |
| "learning_rate": 1.0325414779492028e-06, |
| "loss": 1.1631, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.8687854151268786, |
| "grad_norm": 0.6028695555356325, |
| "learning_rate": 1.0264575195093628e-06, |
| "loss": 1.1203, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.8691796008869179, |
| "grad_norm": 0.5908979194467311, |
| "learning_rate": 1.020390568249976e-06, |
| "loss": 1.1464, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.8695737866469574, |
| "grad_norm": 0.6020405748750884, |
| "learning_rate": 1.0143406356694797e-06, |
| "loss": 1.1964, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.8699679724069967, |
| "grad_norm": 0.5976257450496796, |
| "learning_rate": 1.0083077332340563e-06, |
| "loss": 1.1588, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.8703621581670362, |
| "grad_norm": 0.5924445023992051, |
| "learning_rate": 1.0022918723776175e-06, |
| "loss": 1.1257, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.8707563439270757, |
| "grad_norm": 0.6344444392731119, |
| "learning_rate": 9.962930645017731e-07, |
| "loss": 1.1801, |
| "step": 2209 |
| }, |
| { |
| "epoch": 0.871150529687115, |
| "grad_norm": 0.6241397033723098, |
| "learning_rate": 9.903113209758098e-07, |
| "loss": 1.1347, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.8715447154471545, |
| "grad_norm": 0.6092226491641914, |
| "learning_rate": 9.843466531366774e-07, |
| "loss": 1.0919, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.8719389012071939, |
| "grad_norm": 0.6236635571749678, |
| "learning_rate": 9.783990722889658e-07, |
| "loss": 1.231, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.8723330869672333, |
| "grad_norm": 0.6136904911563315, |
| "learning_rate": 9.724685897048747e-07, |
| "loss": 1.2087, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.6091769703428004, |
| "learning_rate": 9.665552166241965e-07, |
| "loss": 1.1516, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.8731214584873122, |
| "grad_norm": 0.5777422885075877, |
| "learning_rate": 9.606589642543064e-07, |
| "loss": 1.1211, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.8735156442473516, |
| "grad_norm": 0.6279241245367188, |
| "learning_rate": 9.547798437701194e-07, |
| "loss": 1.1701, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.873909830007391, |
| "grad_norm": 0.6055169158607546, |
| "learning_rate": 9.489178663140897e-07, |
| "loss": 1.1508, |
| "step": 2217 |
| }, |
| { |
| "epoch": 0.8743040157674304, |
| "grad_norm": 0.6227455138805572, |
| "learning_rate": 9.43073042996181e-07, |
| "loss": 1.1853, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.8746982015274698, |
| "grad_norm": 0.6205644720521007, |
| "learning_rate": 9.372453848938401e-07, |
| "loss": 1.1604, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.8750923872875093, |
| "grad_norm": 0.5946939883094988, |
| "learning_rate": 9.314349030519843e-07, |
| "loss": 1.1243, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.8754865730475486, |
| "grad_norm": 0.6057397264443781, |
| "learning_rate": 9.256416084829778e-07, |
| "loss": 1.141, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.8758807588075881, |
| "grad_norm": 0.6080411686477221, |
| "learning_rate": 9.198655121666111e-07, |
| "loss": 1.1783, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.8762749445676274, |
| "grad_norm": 0.6005470900378805, |
| "learning_rate": 9.141066250500741e-07, |
| "loss": 1.147, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.8766691303276669, |
| "grad_norm": 0.5945362980985712, |
| "learning_rate": 9.083649580479493e-07, |
| "loss": 1.1036, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.8770633160877064, |
| "grad_norm": 0.6099070701658922, |
| "learning_rate": 9.026405220421785e-07, |
| "loss": 1.155, |
| "step": 2225 |
| }, |
| { |
| "epoch": 0.8774575018477457, |
| "grad_norm": 0.6137077181265143, |
| "learning_rate": 8.969333278820447e-07, |
| "loss": 1.1849, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.8778516876077852, |
| "grad_norm": 0.6082519323627844, |
| "learning_rate": 8.912433863841541e-07, |
| "loss": 1.1608, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.8782458733678246, |
| "grad_norm": 0.604418332046713, |
| "learning_rate": 8.855707083324183e-07, |
| "loss": 1.1366, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.878640059127864, |
| "grad_norm": 0.58974397331068, |
| "learning_rate": 8.799153044780229e-07, |
| "loss": 1.1366, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.8790342448879034, |
| "grad_norm": 0.652855576695134, |
| "learning_rate": 8.742771855394205e-07, |
| "loss": 1.2052, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.8794284306479428, |
| "grad_norm": 0.606150321404692, |
| "learning_rate": 8.686563622023059e-07, |
| "loss": 1.1637, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.8798226164079823, |
| "grad_norm": 0.5985881774469998, |
| "learning_rate": 8.630528451195874e-07, |
| "loss": 1.1659, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.8802168021680217, |
| "grad_norm": 0.6204340076356355, |
| "learning_rate": 8.574666449113766e-07, |
| "loss": 1.1584, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.8806109879280611, |
| "grad_norm": 0.6270054382615008, |
| "learning_rate": 8.518977721649679e-07, |
| "loss": 1.2141, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.8810051736881005, |
| "grad_norm": 0.6090284700913406, |
| "learning_rate": 8.46346237434813e-07, |
| "loss": 1.1922, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.88139935944814, |
| "grad_norm": 0.6667233953406846, |
| "learning_rate": 8.408120512425e-07, |
| "loss": 1.267, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.8817935452081793, |
| "grad_norm": 0.6099197043950569, |
| "learning_rate": 8.352952240767453e-07, |
| "loss": 1.1661, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.8821877309682188, |
| "grad_norm": 0.6353214535694008, |
| "learning_rate": 8.297957663933609e-07, |
| "loss": 1.2521, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.8825819167282581, |
| "grad_norm": 0.5822802452492017, |
| "learning_rate": 8.243136886152381e-07, |
| "loss": 1.1051, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.8829761024882976, |
| "grad_norm": 0.6024284924891233, |
| "learning_rate": 8.188490011323291e-07, |
| "loss": 1.1844, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.8833702882483371, |
| "grad_norm": 0.6218166801091192, |
| "learning_rate": 8.134017143016304e-07, |
| "loss": 1.2239, |
| "step": 2241 |
| }, |
| { |
| "epoch": 0.8837644740083764, |
| "grad_norm": 0.5982021682698988, |
| "learning_rate": 8.079718384471557e-07, |
| "loss": 1.1807, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.8841586597684159, |
| "grad_norm": 0.6167445078039492, |
| "learning_rate": 8.025593838599221e-07, |
| "loss": 1.1514, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.8845528455284553, |
| "grad_norm": 0.6267698758553212, |
| "learning_rate": 7.971643607979273e-07, |
| "loss": 1.1775, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.8849470312884947, |
| "grad_norm": 0.6007524051589882, |
| "learning_rate": 7.917867794861378e-07, |
| "loss": 1.1715, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.8853412170485341, |
| "grad_norm": 0.5867075125001983, |
| "learning_rate": 7.864266501164541e-07, |
| "loss": 1.142, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.8857354028085735, |
| "grad_norm": 0.6117682983819526, |
| "learning_rate": 7.810839828477101e-07, |
| "loss": 1.1969, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.886129588568613, |
| "grad_norm": 0.6205037469861255, |
| "learning_rate": 7.757587878056372e-07, |
| "loss": 1.2472, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.8865237743286524, |
| "grad_norm": 0.6737180765038134, |
| "learning_rate": 7.704510750828542e-07, |
| "loss": 1.2256, |
| "step": 2249 |
| }, |
| { |
| "epoch": 0.8869179600886918, |
| "grad_norm": 0.5977988152478557, |
| "learning_rate": 7.651608547388489e-07, |
| "loss": 1.2092, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.8873121458487312, |
| "grad_norm": 0.5870543672858427, |
| "learning_rate": 7.598881367999566e-07, |
| "loss": 1.1694, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.8877063316087707, |
| "grad_norm": 0.604325426385001, |
| "learning_rate": 7.546329312593382e-07, |
| "loss": 1.2068, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.88810051736881, |
| "grad_norm": 0.5858794646282535, |
| "learning_rate": 7.49395248076964e-07, |
| "loss": 1.1019, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.8884947031288495, |
| "grad_norm": 0.6284533960586269, |
| "learning_rate": 7.441750971795991e-07, |
| "loss": 1.1827, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.6073835508191624, |
| "learning_rate": 7.389724884607763e-07, |
| "loss": 1.1928, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.8892830746489283, |
| "grad_norm": 0.6052990860959455, |
| "learning_rate": 7.337874317807803e-07, |
| "loss": 1.1328, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.8896772604089678, |
| "grad_norm": 0.6426366735903185, |
| "learning_rate": 7.286199369666346e-07, |
| "loss": 1.184, |
| "step": 2257 |
| }, |
| { |
| "epoch": 0.8900714461690071, |
| "grad_norm": 0.6135398558082623, |
| "learning_rate": 7.234700138120776e-07, |
| "loss": 1.1567, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.8904656319290466, |
| "grad_norm": 0.611697930631017, |
| "learning_rate": 7.183376720775415e-07, |
| "loss": 1.1767, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.890859817689086, |
| "grad_norm": 0.6047260944980717, |
| "learning_rate": 7.13222921490142e-07, |
| "loss": 1.142, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.8912540034491254, |
| "grad_norm": 0.6137118230173922, |
| "learning_rate": 7.081257717436507e-07, |
| "loss": 1.2122, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.8916481892091648, |
| "grad_norm": 0.5862917774633897, |
| "learning_rate": 7.030462324984821e-07, |
| "loss": 1.1464, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.8920423749692042, |
| "grad_norm": 0.5985229585325247, |
| "learning_rate": 6.979843133816744e-07, |
| "loss": 1.1538, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.8924365607292437, |
| "grad_norm": 0.6215683066351476, |
| "learning_rate": 6.929400239868745e-07, |
| "loss": 1.2066, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.8928307464892831, |
| "grad_norm": 0.6090688114535339, |
| "learning_rate": 6.879133738743116e-07, |
| "loss": 1.1246, |
| "step": 2265 |
| }, |
| { |
| "epoch": 0.8932249322493225, |
| "grad_norm": 0.6182971626634737, |
| "learning_rate": 6.829043725707852e-07, |
| "loss": 1.1638, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.8936191180093619, |
| "grad_norm": 0.6028753226292936, |
| "learning_rate": 6.779130295696479e-07, |
| "loss": 1.15, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.8940133037694014, |
| "grad_norm": 0.6138311069551523, |
| "learning_rate": 6.729393543307838e-07, |
| "loss": 1.1561, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.8944074895294407, |
| "grad_norm": 0.5975576303249758, |
| "learning_rate": 6.679833562805882e-07, |
| "loss": 1.1286, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.8948016752894802, |
| "grad_norm": 0.6059495772680955, |
| "learning_rate": 6.630450448119618e-07, |
| "loss": 1.1959, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.8951958610495195, |
| "grad_norm": 0.6797523524629732, |
| "learning_rate": 6.581244292842792e-07, |
| "loss": 1.1897, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.895590046809559, |
| "grad_norm": 0.6011492268276885, |
| "learning_rate": 6.532215190233748e-07, |
| "loss": 1.1667, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.8959842325695985, |
| "grad_norm": 0.6084758528762907, |
| "learning_rate": 6.483363233215345e-07, |
| "loss": 1.1592, |
| "step": 2273 |
| }, |
| { |
| "epoch": 0.8963784183296378, |
| "grad_norm": 0.6140227903857725, |
| "learning_rate": 6.434688514374632e-07, |
| "loss": 1.1743, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.8967726040896773, |
| "grad_norm": 0.6351263385074363, |
| "learning_rate": 6.386191125962749e-07, |
| "loss": 1.1728, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.8971667898497167, |
| "grad_norm": 0.6076012012521917, |
| "learning_rate": 6.337871159894804e-07, |
| "loss": 1.1741, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.8975609756097561, |
| "grad_norm": 0.6197330976983585, |
| "learning_rate": 6.289728707749609e-07, |
| "loss": 1.1687, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.8979551613697955, |
| "grad_norm": 0.6046508571791555, |
| "learning_rate": 6.241763860769535e-07, |
| "loss": 1.1977, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.8983493471298349, |
| "grad_norm": 0.6131957231184164, |
| "learning_rate": 6.193976709860339e-07, |
| "loss": 1.2021, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.8987435328898744, |
| "grad_norm": 0.6067811323591332, |
| "learning_rate": 6.146367345591053e-07, |
| "loss": 1.1561, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.8991377186499138, |
| "grad_norm": 0.5867337322284969, |
| "learning_rate": 6.098935858193688e-07, |
| "loss": 1.1449, |
| "step": 2281 |
| }, |
| { |
| "epoch": 0.8995319044099532, |
| "grad_norm": 0.6098231620125031, |
| "learning_rate": 6.051682337563158e-07, |
| "loss": 1.1462, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.8999260901699926, |
| "grad_norm": 0.6230967996752504, |
| "learning_rate": 6.004606873257101e-07, |
| "loss": 1.1426, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.900320275930032, |
| "grad_norm": 0.5958004624605026, |
| "learning_rate": 5.957709554495683e-07, |
| "loss": 1.1797, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.9007144616900714, |
| "grad_norm": 0.6079824292132843, |
| "learning_rate": 5.910990470161416e-07, |
| "loss": 1.2281, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.9011086474501109, |
| "grad_norm": 0.5984385548123256, |
| "learning_rate": 5.864449708799059e-07, |
| "loss": 1.1619, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.9015028332101502, |
| "grad_norm": 0.591664056006518, |
| "learning_rate": 5.818087358615354e-07, |
| "loss": 1.139, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.9018970189701897, |
| "grad_norm": 0.6275372827109235, |
| "learning_rate": 5.771903507478915e-07, |
| "loss": 1.2364, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.9022912047302292, |
| "grad_norm": 0.5975540870267736, |
| "learning_rate": 5.725898242920092e-07, |
| "loss": 1.1527, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.9026853904902685, |
| "grad_norm": 0.6050375583531165, |
| "learning_rate": 5.680071652130736e-07, |
| "loss": 1.1666, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.903079576250308, |
| "grad_norm": 0.6259743502880166, |
| "learning_rate": 5.634423821964074e-07, |
| "loss": 1.2275, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.9034737620103473, |
| "grad_norm": 0.6231031649083622, |
| "learning_rate": 5.588954838934523e-07, |
| "loss": 1.1716, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.9038679477703868, |
| "grad_norm": 0.6216418043768527, |
| "learning_rate": 5.543664789217562e-07, |
| "loss": 1.1871, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.9042621335304262, |
| "grad_norm": 0.583945627934862, |
| "learning_rate": 5.498553758649516e-07, |
| "loss": 1.1614, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.9046563192904656, |
| "grad_norm": 0.5974644710894348, |
| "learning_rate": 5.45362183272743e-07, |
| "loss": 1.1295, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.9050505050505051, |
| "grad_norm": 0.579085452767809, |
| "learning_rate": 5.408869096608926e-07, |
| "loss": 1.1105, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.9054446908105445, |
| "grad_norm": 0.5929251833508978, |
| "learning_rate": 5.364295635112016e-07, |
| "loss": 1.1386, |
| "step": 2297 |
| }, |
| { |
| "epoch": 0.9058388765705839, |
| "grad_norm": 0.5974271999517115, |
| "learning_rate": 5.319901532714877e-07, |
| "loss": 1.142, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.9062330623306233, |
| "grad_norm": 0.6188389973115496, |
| "learning_rate": 5.27568687355583e-07, |
| "loss": 1.2045, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.9066272480906628, |
| "grad_norm": 0.6234466396061988, |
| "learning_rate": 5.231651741433063e-07, |
| "loss": 1.1656, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.9070214338507021, |
| "grad_norm": 0.6316349387146205, |
| "learning_rate": 5.187796219804508e-07, |
| "loss": 1.1759, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.9074156196107416, |
| "grad_norm": 0.6119904812276791, |
| "learning_rate": 5.144120391787732e-07, |
| "loss": 1.1648, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.9078098053707809, |
| "grad_norm": 0.5992707761677788, |
| "learning_rate": 5.100624340159676e-07, |
| "loss": 1.1705, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.9082039911308204, |
| "grad_norm": 0.6125355457119835, |
| "learning_rate": 5.057308147356632e-07, |
| "loss": 1.1878, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.9085981768908599, |
| "grad_norm": 0.5987001014690438, |
| "learning_rate": 5.014171895473929e-07, |
| "loss": 1.1728, |
| "step": 2305 |
| }, |
| { |
| "epoch": 0.9089923626508992, |
| "grad_norm": 0.6233596220905993, |
| "learning_rate": 4.971215666265939e-07, |
| "loss": 1.1682, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.9093865484109387, |
| "grad_norm": 0.6120680988346603, |
| "learning_rate": 4.928439541145802e-07, |
| "loss": 1.154, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.909780734170978, |
| "grad_norm": 0.6159172688282434, |
| "learning_rate": 4.885843601185291e-07, |
| "loss": 1.1545, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.9101749199310175, |
| "grad_norm": 0.6561541537105161, |
| "learning_rate": 4.843427927114752e-07, |
| "loss": 1.2581, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.9105691056910569, |
| "grad_norm": 0.6397314727277476, |
| "learning_rate": 4.801192599322835e-07, |
| "loss": 1.2649, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.9109632914510963, |
| "grad_norm": 0.5968063081167863, |
| "learning_rate": 4.759137697856364e-07, |
| "loss": 1.1411, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.9113574772111358, |
| "grad_norm": 0.6046846431473332, |
| "learning_rate": 4.717263302420283e-07, |
| "loss": 1.2202, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.9117516629711752, |
| "grad_norm": 0.6213044733495849, |
| "learning_rate": 4.675569492377363e-07, |
| "loss": 1.1844, |
| "step": 2313 |
| }, |
| { |
| "epoch": 0.9121458487312146, |
| "grad_norm": 0.6145028852257042, |
| "learning_rate": 4.634056346748117e-07, |
| "loss": 1.2235, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.912540034491254, |
| "grad_norm": 0.6041076227153636, |
| "learning_rate": 4.5927239442107306e-07, |
| "loss": 1.1794, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.9129342202512934, |
| "grad_norm": 0.5917377858853244, |
| "learning_rate": 4.551572363100731e-07, |
| "loss": 1.1421, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.9133284060113328, |
| "grad_norm": 0.57962701939227, |
| "learning_rate": 4.5106016814110197e-07, |
| "loss": 1.1574, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.9137225917713723, |
| "grad_norm": 0.6010271614392757, |
| "learning_rate": 4.469811976791605e-07, |
| "loss": 1.1287, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.9141167775314116, |
| "grad_norm": 0.6304038957433044, |
| "learning_rate": 4.429203326549525e-07, |
| "loss": 1.1971, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.9145109632914511, |
| "grad_norm": 0.6078465285882131, |
| "learning_rate": 4.3887758076486597e-07, |
| "loss": 1.175, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.9149051490514906, |
| "grad_norm": 0.6058022551406895, |
| "learning_rate": 4.3485294967095747e-07, |
| "loss": 1.1782, |
| "step": 2321 |
| }, |
| { |
| "epoch": 0.9152993348115299, |
| "grad_norm": 0.6222158541213707, |
| "learning_rate": 4.308464470009432e-07, |
| "loss": 1.2142, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.9156935205715694, |
| "grad_norm": 0.5967586046808354, |
| "learning_rate": 4.2685808034818366e-07, |
| "loss": 1.1787, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.9160877063316087, |
| "grad_norm": 0.6168581167404708, |
| "learning_rate": 4.228878572716588e-07, |
| "loss": 1.1771, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.9164818920916482, |
| "grad_norm": 0.6140349806295636, |
| "learning_rate": 4.189357852959708e-07, |
| "loss": 1.1865, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.9168760778516876, |
| "grad_norm": 0.616944566915736, |
| "learning_rate": 4.150018719113147e-07, |
| "loss": 1.0969, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.917270263611727, |
| "grad_norm": 0.6129659770559598, |
| "learning_rate": 4.110861245734721e-07, |
| "loss": 1.1765, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.9176644493717665, |
| "grad_norm": 0.6033445957652277, |
| "learning_rate": 4.0718855070379535e-07, |
| "loss": 1.2008, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.9180586351318059, |
| "grad_norm": 0.6190874106262034, |
| "learning_rate": 4.0330915768919454e-07, |
| "loss": 1.2122, |
| "step": 2329 |
| }, |
| { |
| "epoch": 0.9184528208918453, |
| "grad_norm": 0.6012965614913941, |
| "learning_rate": 3.9944795288212047e-07, |
| "loss": 1.1824, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.9188470066518847, |
| "grad_norm": 0.5999458716930699, |
| "learning_rate": 3.956049436005538e-07, |
| "loss": 1.1437, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.9192411924119241, |
| "grad_norm": 0.6010551580255399, |
| "learning_rate": 3.917801371279895e-07, |
| "loss": 1.1636, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.9196353781719635, |
| "grad_norm": 0.6265717559201462, |
| "learning_rate": 3.8797354071342443e-07, |
| "loss": 1.1524, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.920029563932003, |
| "grad_norm": 0.5933108670825852, |
| "learning_rate": 3.841851615713399e-07, |
| "loss": 1.1646, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.9204237496920423, |
| "grad_norm": 0.6057802305576383, |
| "learning_rate": 3.8041500688169253e-07, |
| "loss": 1.1538, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.9208179354520818, |
| "grad_norm": 0.6237793034270526, |
| "learning_rate": 3.766630837899032e-07, |
| "loss": 1.1886, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.9212121212121213, |
| "grad_norm": 0.6198812448884538, |
| "learning_rate": 3.729293994068306e-07, |
| "loss": 1.1955, |
| "step": 2337 |
| }, |
| { |
| "epoch": 0.9216063069721606, |
| "grad_norm": 0.6247300075084717, |
| "learning_rate": 3.6921396080877414e-07, |
| "loss": 1.2292, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.9220004927322001, |
| "grad_norm": 0.6062053891469021, |
| "learning_rate": 3.6551677503744776e-07, |
| "loss": 1.1789, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.9223946784922394, |
| "grad_norm": 0.6105135332217473, |
| "learning_rate": 3.618378490999719e-07, |
| "loss": 1.1439, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.9227888642522789, |
| "grad_norm": 0.5768948920273077, |
| "learning_rate": 3.581771899688646e-07, |
| "loss": 1.1398, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.9231830500123183, |
| "grad_norm": 0.6233702760949931, |
| "learning_rate": 3.545348045820174e-07, |
| "loss": 1.2338, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.9235772357723577, |
| "grad_norm": 0.6293178839378355, |
| "learning_rate": 3.5091069984269366e-07, |
| "loss": 1.284, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.9239714215323972, |
| "grad_norm": 0.6012639840259887, |
| "learning_rate": 3.473048826195058e-07, |
| "loss": 1.1688, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.9243656072924366, |
| "grad_norm": 0.6260153598558462, |
| "learning_rate": 3.4371735974641053e-07, |
| "loss": 1.2185, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.924759793052476, |
| "grad_norm": 0.6268091346400951, |
| "learning_rate": 3.40148138022689e-07, |
| "loss": 1.2058, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.9251539788125154, |
| "grad_norm": 0.6019494923660028, |
| "learning_rate": 3.365972242129378e-07, |
| "loss": 1.1248, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.9255481645725548, |
| "grad_norm": 0.6127790785927769, |
| "learning_rate": 3.3306462504705706e-07, |
| "loss": 1.1704, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.9259423503325942, |
| "grad_norm": 0.6434642793289438, |
| "learning_rate": 3.2955034722023214e-07, |
| "loss": 1.1639, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.9263365360926337, |
| "grad_norm": 0.6160741690407769, |
| "learning_rate": 3.2605439739292863e-07, |
| "loss": 1.181, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.926730721852673, |
| "grad_norm": 0.6040626337033564, |
| "learning_rate": 3.2257678219087543e-07, |
| "loss": 1.1359, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.9271249076127125, |
| "grad_norm": 0.6569697201075794, |
| "learning_rate": 3.191175082050502e-07, |
| "loss": 1.1525, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.927519093372752, |
| "grad_norm": 0.600173226578904, |
| "learning_rate": 3.156765819916696e-07, |
| "loss": 1.1436, |
| "step": 2353 |
| }, |
| { |
| "epoch": 0.9279132791327913, |
| "grad_norm": 0.5975529599005833, |
| "learning_rate": 3.122540100721794e-07, |
| "loss": 1.1291, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.9283074648928308, |
| "grad_norm": 0.6182461879570678, |
| "learning_rate": 3.088497989332351e-07, |
| "loss": 1.1686, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.9287016506528701, |
| "grad_norm": 0.6027606375147575, |
| "learning_rate": 3.05463955026698e-07, |
| "loss": 1.132, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.9290958364129096, |
| "grad_norm": 0.6211823263235605, |
| "learning_rate": 3.020964847696151e-07, |
| "loss": 1.2116, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.929490022172949, |
| "grad_norm": 0.8055569064292696, |
| "learning_rate": 2.987473945442143e-07, |
| "loss": 1.1802, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.9298842079329884, |
| "grad_norm": 0.63319663534154, |
| "learning_rate": 2.9541669069788505e-07, |
| "loss": 1.1735, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.9302783936930279, |
| "grad_norm": 0.6092240457871959, |
| "learning_rate": 2.9210437954316997e-07, |
| "loss": 1.1769, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.9306725794530673, |
| "grad_norm": 0.5994634449672462, |
| "learning_rate": 2.888104673577574e-07, |
| "loss": 1.1217, |
| "step": 2361 |
| }, |
| { |
| "epoch": 0.9310667652131067, |
| "grad_norm": 0.6129161824755393, |
| "learning_rate": 2.8553496038445707e-07, |
| "loss": 1.1949, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.9314609509731461, |
| "grad_norm": 0.5946581674891636, |
| "learning_rate": 2.8227786483120523e-07, |
| "loss": 1.1596, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.9318551367331855, |
| "grad_norm": 0.6220408417857064, |
| "learning_rate": 2.790391868710374e-07, |
| "loss": 1.1697, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.9322493224932249, |
| "grad_norm": 0.611301302747428, |
| "learning_rate": 2.7581893264208346e-07, |
| "loss": 1.1655, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.9326435082532644, |
| "grad_norm": 0.6303361299231326, |
| "learning_rate": 2.7261710824755814e-07, |
| "loss": 1.1762, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.9330376940133037, |
| "grad_norm": 0.613809194427214, |
| "learning_rate": 2.694337197557462e-07, |
| "loss": 1.217, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.9334318797733432, |
| "grad_norm": 0.5947788641950997, |
| "learning_rate": 2.66268773199988e-07, |
| "loss": 1.2082, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.9338260655333827, |
| "grad_norm": 0.6342184771933248, |
| "learning_rate": 2.631222745786788e-07, |
| "loss": 1.2426, |
| "step": 2369 |
| }, |
| { |
| "epoch": 0.934220251293422, |
| "grad_norm": 0.6238792637987063, |
| "learning_rate": 2.5999422985524157e-07, |
| "loss": 1.2235, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.9346144370534615, |
| "grad_norm": 0.6601808628731608, |
| "learning_rate": 2.5688464495813304e-07, |
| "loss": 1.2687, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.9350086228135008, |
| "grad_norm": 0.591780101499758, |
| "learning_rate": 2.537935257808177e-07, |
| "loss": 1.1727, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.9354028085735403, |
| "grad_norm": 0.6004908722208354, |
| "learning_rate": 2.507208781817638e-07, |
| "loss": 1.1644, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.9357969943335797, |
| "grad_norm": 0.6213717940339839, |
| "learning_rate": 2.4766670798443414e-07, |
| "loss": 1.1808, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.9361911800936191, |
| "grad_norm": 0.6088482849843166, |
| "learning_rate": 2.4463102097726843e-07, |
| "loss": 1.1679, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.9365853658536586, |
| "grad_norm": 0.5797582430181196, |
| "learning_rate": 2.4161382291367776e-07, |
| "loss": 1.1257, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.936979551613698, |
| "grad_norm": 0.619020334020193, |
| "learning_rate": 2.386151195120323e-07, |
| "loss": 1.1419, |
| "step": 2377 |
| }, |
| { |
| "epoch": 0.9373737373737374, |
| "grad_norm": 0.5946052196409608, |
| "learning_rate": 2.356349164556493e-07, |
| "loss": 1.1304, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.9377679231337768, |
| "grad_norm": 0.6091945754012382, |
| "learning_rate": 2.3267321939278277e-07, |
| "loss": 1.2201, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.9381621088938162, |
| "grad_norm": 0.6170932843567667, |
| "learning_rate": 2.2973003393661374e-07, |
| "loss": 1.2362, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.9385562946538556, |
| "grad_norm": 0.6012825687735323, |
| "learning_rate": 2.2680536566523802e-07, |
| "loss": 1.15, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.9389504804138951, |
| "grad_norm": 0.6255938234171833, |
| "learning_rate": 2.2389922012165944e-07, |
| "loss": 1.2223, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.9393446661739344, |
| "grad_norm": 0.5876733837374598, |
| "learning_rate": 2.2101160281377098e-07, |
| "loss": 1.141, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.9397388519339739, |
| "grad_norm": 0.6146179783064085, |
| "learning_rate": 2.1814251921435603e-07, |
| "loss": 1.1977, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.9401330376940134, |
| "grad_norm": 0.5988256998213285, |
| "learning_rate": 2.1529197476106821e-07, |
| "loss": 1.1755, |
| "step": 2385 |
| }, |
| { |
| "epoch": 0.9405272234540527, |
| "grad_norm": 0.619835334128145, |
| "learning_rate": 2.124599748564249e-07, |
| "loss": 1.1283, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.9409214092140922, |
| "grad_norm": 0.598162178135982, |
| "learning_rate": 2.0964652486779814e-07, |
| "loss": 1.1926, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.9413155949741315, |
| "grad_norm": 0.613362224923904, |
| "learning_rate": 2.0685163012740039e-07, |
| "loss": 1.1947, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.941709780734171, |
| "grad_norm": 0.5975727904035542, |
| "learning_rate": 2.0407529593228114e-07, |
| "loss": 1.1629, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.9421039664942104, |
| "grad_norm": 0.6139860108767166, |
| "learning_rate": 2.013175275443102e-07, |
| "loss": 1.2471, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.9424981522542498, |
| "grad_norm": 0.585425153613225, |
| "learning_rate": 1.9857833019017004e-07, |
| "loss": 1.0983, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.9428923380142893, |
| "grad_norm": 0.6118000826090201, |
| "learning_rate": 1.9585770906134671e-07, |
| "loss": 1.1331, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.9432865237743286, |
| "grad_norm": 0.5921590656780138, |
| "learning_rate": 1.9315566931412233e-07, |
| "loss": 1.1126, |
| "step": 2393 |
| }, |
| { |
| "epoch": 0.9436807095343681, |
| "grad_norm": 0.6165903484277372, |
| "learning_rate": 1.9047221606955713e-07, |
| "loss": 1.198, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.9440748952944075, |
| "grad_norm": 0.6368352242306206, |
| "learning_rate": 1.8780735441348842e-07, |
| "loss": 1.2699, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.9444690810544469, |
| "grad_norm": 0.6099076721349784, |
| "learning_rate": 1.8516108939651945e-07, |
| "loss": 1.2367, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.9448632668144863, |
| "grad_norm": 0.6085928656086841, |
| "learning_rate": 1.8253342603400503e-07, |
| "loss": 1.1395, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.9452574525745258, |
| "grad_norm": 0.6174687470746002, |
| "learning_rate": 1.7992436930604484e-07, |
| "loss": 1.1651, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.9456516383345651, |
| "grad_norm": 0.6129685190288655, |
| "learning_rate": 1.7733392415747452e-07, |
| "loss": 1.1806, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.9460458240946046, |
| "grad_norm": 0.5836621907525494, |
| "learning_rate": 1.7476209549785906e-07, |
| "loss": 1.1498, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.946440009854644, |
| "grad_norm": 0.5996938824902894, |
| "learning_rate": 1.7220888820147607e-07, |
| "loss": 1.1156, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.9468341956146834, |
| "grad_norm": 0.6162536454834876, |
| "learning_rate": 1.6967430710731258e-07, |
| "loss": 1.1963, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.9472283813747229, |
| "grad_norm": 0.6280127586386618, |
| "learning_rate": 1.6715835701905604e-07, |
| "loss": 1.2415, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.9476225671347622, |
| "grad_norm": 0.6202334141414314, |
| "learning_rate": 1.6466104270508099e-07, |
| "loss": 1.1966, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.9480167528948017, |
| "grad_norm": 0.6122489081297163, |
| "learning_rate": 1.6218236889844142e-07, |
| "loss": 1.1671, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.948410938654841, |
| "grad_norm": 0.6035232347033065, |
| "learning_rate": 1.5972234029686617e-07, |
| "loss": 1.0962, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.9488051244148805, |
| "grad_norm": 0.6496961489577563, |
| "learning_rate": 1.5728096156274353e-07, |
| "loss": 1.2318, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.94919931017492, |
| "grad_norm": 0.6147346192870907, |
| "learning_rate": 1.5485823732311777e-07, |
| "loss": 1.0982, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.9495934959349593, |
| "grad_norm": 0.6303713451636969, |
| "learning_rate": 1.5245417216967596e-07, |
| "loss": 1.2279, |
| "step": 2409 |
| }, |
| { |
| "epoch": 0.9499876816949988, |
| "grad_norm": 0.5889090939067558, |
| "learning_rate": 1.5006877065874338e-07, |
| "loss": 1.169, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.9503818674550382, |
| "grad_norm": 0.6019171279270943, |
| "learning_rate": 1.477020373112714e-07, |
| "loss": 1.1254, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.9507760532150776, |
| "grad_norm": 0.6157755932202649, |
| "learning_rate": 1.4535397661283092e-07, |
| "loss": 1.1132, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.951170238975117, |
| "grad_norm": 0.6132084756622929, |
| "learning_rate": 1.4302459301360428e-07, |
| "loss": 1.1932, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.9515644247351565, |
| "grad_norm": 0.6249158834646313, |
| "learning_rate": 1.4071389092837339e-07, |
| "loss": 1.2299, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.9519586104951958, |
| "grad_norm": 0.6183091225952251, |
| "learning_rate": 1.3842187473651626e-07, |
| "loss": 1.1556, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.9523527962552353, |
| "grad_norm": 0.5918073875966923, |
| "learning_rate": 1.3614854878199578e-07, |
| "loss": 1.1273, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.9527469820152747, |
| "grad_norm": 0.5982357040080991, |
| "learning_rate": 1.3389391737335112e-07, |
| "loss": 1.1114, |
| "step": 2417 |
| }, |
| { |
| "epoch": 0.9531411677753141, |
| "grad_norm": 0.5883507787023478, |
| "learning_rate": 1.3165798478369184e-07, |
| "loss": 1.1184, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.9535353535353536, |
| "grad_norm": 0.6182981301693431, |
| "learning_rate": 1.2944075525068712e-07, |
| "loss": 1.1803, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.9539295392953929, |
| "grad_norm": 0.6185455523897264, |
| "learning_rate": 1.272422329765588e-07, |
| "loss": 1.1795, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.9543237250554324, |
| "grad_norm": 0.6220883345091087, |
| "learning_rate": 1.2506242212807607e-07, |
| "loss": 1.2235, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.9547179108154717, |
| "grad_norm": 0.6098949505020008, |
| "learning_rate": 1.2290132683654087e-07, |
| "loss": 1.1566, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.9551120965755112, |
| "grad_norm": 0.6015695706886922, |
| "learning_rate": 1.2075895119779025e-07, |
| "loss": 1.1703, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.9555062823355507, |
| "grad_norm": 0.6332300803609152, |
| "learning_rate": 1.1863529927217731e-07, |
| "loss": 1.1943, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.95590046809559, |
| "grad_norm": 0.612260563852357, |
| "learning_rate": 1.1653037508457032e-07, |
| "loss": 1.1732, |
| "step": 2425 |
| }, |
| { |
| "epoch": 0.9562946538556295, |
| "grad_norm": 0.5999781512649874, |
| "learning_rate": 1.1444418262434587e-07, |
| "loss": 1.1752, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.9566888396156689, |
| "grad_norm": 0.6008667456915643, |
| "learning_rate": 1.1237672584537673e-07, |
| "loss": 1.1495, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.9570830253757083, |
| "grad_norm": 0.6153244050308969, |
| "learning_rate": 1.1032800866602633e-07, |
| "loss": 1.1937, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.9574772111357477, |
| "grad_norm": 0.5959829809552201, |
| "learning_rate": 1.0829803496914537e-07, |
| "loss": 1.1581, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.9578713968957872, |
| "grad_norm": 0.6077619966859046, |
| "learning_rate": 1.062868086020552e-07, |
| "loss": 1.1725, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.9582655826558265, |
| "grad_norm": 0.6047743581903363, |
| "learning_rate": 1.0429433337655115e-07, |
| "loss": 1.1331, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.958659768415866, |
| "grad_norm": 0.6201599918518463, |
| "learning_rate": 1.0232061306888918e-07, |
| "loss": 1.1858, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.9590539541759054, |
| "grad_norm": 0.6231710616869747, |
| "learning_rate": 1.0036565141977594e-07, |
| "loss": 1.2016, |
| "step": 2433 |
| }, |
| { |
| "epoch": 0.9594481399359448, |
| "grad_norm": 0.6448288343953715, |
| "learning_rate": 9.842945213437094e-08, |
| "loss": 1.2158, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.9598423256959843, |
| "grad_norm": 0.6167891303410092, |
| "learning_rate": 9.651201888227102e-08, |
| "loss": 1.1559, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.9602365114560236, |
| "grad_norm": 0.6038868590043498, |
| "learning_rate": 9.461335529750815e-08, |
| "loss": 1.1601, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.9606306972160631, |
| "grad_norm": 0.6077888775853522, |
| "learning_rate": 9.273346497854052e-08, |
| "loss": 1.1977, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.9610248829761024, |
| "grad_norm": 0.603082429453148, |
| "learning_rate": 9.08723514882437e-08, |
| "loss": 1.1205, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.9614190687361419, |
| "grad_norm": 0.6010255915248192, |
| "learning_rate": 8.903001835390946e-08, |
| "loss": 1.1565, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.9618132544961814, |
| "grad_norm": 0.5911163710697771, |
| "learning_rate": 8.720646906723585e-08, |
| "loss": 1.1529, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.9622074402562207, |
| "grad_norm": 0.6227655050280417, |
| "learning_rate": 8.540170708431716e-08, |
| "loss": 1.2165, |
| "step": 2441 |
| }, |
| { |
| "epoch": 0.9626016260162602, |
| "grad_norm": 0.626494521422824, |
| "learning_rate": 8.36157358256473e-08, |
| "loss": 1.2108, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.9629958117762996, |
| "grad_norm": 0.5903062085449574, |
| "learning_rate": 8.184855867609976e-08, |
| "loss": 1.1558, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.963389997536339, |
| "grad_norm": 0.6107447987815348, |
| "learning_rate": 8.010017898493316e-08, |
| "loss": 1.159, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.9637841832963784, |
| "grad_norm": 0.608930442078416, |
| "learning_rate": 7.837060006577801e-08, |
| "loss": 1.1968, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.9641783690564178, |
| "grad_norm": 0.594295975968586, |
| "learning_rate": 7.665982519663329e-08, |
| "loss": 1.1405, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.9645725548164572, |
| "grad_norm": 0.5973153401367114, |
| "learning_rate": 7.49678576198587e-08, |
| "loss": 1.1439, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.9649667405764967, |
| "grad_norm": 0.5985621492583797, |
| "learning_rate": 7.329470054217024e-08, |
| "loss": 1.1717, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.9653609263365361, |
| "grad_norm": 0.602845907873701, |
| "learning_rate": 7.164035713463358e-08, |
| "loss": 1.1579, |
| "step": 2449 |
| }, |
| { |
| "epoch": 0.9657551120965755, |
| "grad_norm": 0.6205834350913317, |
| "learning_rate": 7.000483053265506e-08, |
| "loss": 1.2058, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.966149297856615, |
| "grad_norm": 0.6363339379587928, |
| "learning_rate": 6.838812383597959e-08, |
| "loss": 1.2335, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.9665434836166543, |
| "grad_norm": 0.6717079440212176, |
| "learning_rate": 6.679024010868617e-08, |
| "loss": 1.1835, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.9669376693766938, |
| "grad_norm": 0.6013068431470037, |
| "learning_rate": 6.521118237917456e-08, |
| "loss": 1.1285, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.9673318551367331, |
| "grad_norm": 0.5951721146532576, |
| "learning_rate": 6.365095364016971e-08, |
| "loss": 1.1539, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.9677260408967726, |
| "grad_norm": 0.6658577073295611, |
| "learning_rate": 6.210955684870512e-08, |
| "loss": 1.2482, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.9681202266568121, |
| "grad_norm": 0.6300768133578355, |
| "learning_rate": 6.058699492612841e-08, |
| "loss": 1.2359, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.9685144124168514, |
| "grad_norm": 0.6082556264479969, |
| "learning_rate": 5.9083270758085733e-08, |
| "loss": 1.1134, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.9689085981768909, |
| "grad_norm": 0.6185300650907809, |
| "learning_rate": 5.759838719452404e-08, |
| "loss": 1.2206, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.9693027839369303, |
| "grad_norm": 0.6117970900606814, |
| "learning_rate": 5.6132347049679955e-08, |
| "loss": 1.1647, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.9696969696969697, |
| "grad_norm": 0.5976874867227856, |
| "learning_rate": 5.468515310207867e-08, |
| "loss": 1.1589, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.9700911554570091, |
| "grad_norm": 0.6304288708508361, |
| "learning_rate": 5.3256808094527266e-08, |
| "loss": 1.1898, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.9704853412170485, |
| "grad_norm": 0.6311672116169154, |
| "learning_rate": 5.184731473410698e-08, |
| "loss": 1.1659, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.9708795269770879, |
| "grad_norm": 0.58587149930154, |
| "learning_rate": 5.045667569217316e-08, |
| "loss": 1.1655, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.9712737127371274, |
| "grad_norm": 0.6010861112474221, |
| "learning_rate": 4.9084893604344205e-08, |
| "loss": 1.1392, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.9716678984971668, |
| "grad_norm": 0.6136708610607174, |
| "learning_rate": 4.7731971070503754e-08, |
| "loss": 1.1839, |
| "step": 2465 |
| }, |
| { |
| "epoch": 0.9720620842572062, |
| "grad_norm": 0.5941054001607767, |
| "learning_rate": 4.639791065478738e-08, |
| "loss": 1.1675, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.9724562700172457, |
| "grad_norm": 0.6082606108108427, |
| "learning_rate": 4.508271488558369e-08, |
| "loss": 1.1678, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.972850455777285, |
| "grad_norm": 0.63694043332642, |
| "learning_rate": 4.3786386255531e-08, |
| "loss": 1.2357, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.9732446415373245, |
| "grad_norm": 0.6218499921470892, |
| "learning_rate": 4.250892722150401e-08, |
| "loss": 1.1817, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.9736388272973638, |
| "grad_norm": 0.618351384803128, |
| "learning_rate": 4.1250340204619375e-08, |
| "loss": 1.1498, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.9740330130574033, |
| "grad_norm": 0.6221821265806511, |
| "learning_rate": 4.001062759022456e-08, |
| "loss": 1.1812, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.9744271988174428, |
| "grad_norm": 0.6350605796642136, |
| "learning_rate": 3.878979172789454e-08, |
| "loss": 1.2148, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.9748213845774821, |
| "grad_norm": 0.6203025166705224, |
| "learning_rate": 3.758783493142737e-08, |
| "loss": 1.1737, |
| "step": 2473 |
| }, |
| { |
| "epoch": 0.9752155703375216, |
| "grad_norm": 0.6008544551965036, |
| "learning_rate": 3.640475947884303e-08, |
| "loss": 1.1266, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.975609756097561, |
| "grad_norm": 0.6113341557887032, |
| "learning_rate": 3.5240567612375706e-08, |
| "loss": 1.2014, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.9760039418576004, |
| "grad_norm": 0.603617063644902, |
| "learning_rate": 3.4095261538468204e-08, |
| "loss": 1.166, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.9763981276176398, |
| "grad_norm": 0.6271623067160851, |
| "learning_rate": 3.2968843427770844e-08, |
| "loss": 1.201, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.9767923133776792, |
| "grad_norm": 0.5896479252918767, |
| "learning_rate": 3.186131541513926e-08, |
| "loss": 1.1689, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.9771864991377186, |
| "grad_norm": 0.6139597394243195, |
| "learning_rate": 3.0772679599623266e-08, |
| "loss": 1.1962, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.9775806848977581, |
| "grad_norm": 0.6298030226727921, |
| "learning_rate": 2.9702938044468e-08, |
| "loss": 1.1874, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.9779748706577975, |
| "grad_norm": 0.5794413704040846, |
| "learning_rate": 2.865209277711167e-08, |
| "loss": 1.1074, |
| "step": 2481 |
| }, |
| { |
| "epoch": 0.9783690564178369, |
| "grad_norm": 0.5885716516364036, |
| "learning_rate": 2.7620145789177823e-08, |
| "loss": 1.125, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.9787632421778764, |
| "grad_norm": 0.6320208790946613, |
| "learning_rate": 2.6607099036470853e-08, |
| "loss": 1.2337, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.9791574279379157, |
| "grad_norm": 0.6070406774043791, |
| "learning_rate": 2.5612954438977155e-08, |
| "loss": 1.1309, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.9795516136979552, |
| "grad_norm": 0.6061624110898025, |
| "learning_rate": 2.463771388085623e-08, |
| "loss": 1.161, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.9799457994579945, |
| "grad_norm": 0.6181129393801446, |
| "learning_rate": 2.368137921044289e-08, |
| "loss": 1.152, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.980339985218034, |
| "grad_norm": 0.6053023110866588, |
| "learning_rate": 2.274395224023618e-08, |
| "loss": 1.2039, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.9807341709780735, |
| "grad_norm": 0.611443540064316, |
| "learning_rate": 2.1825434746903794e-08, |
| "loss": 1.2308, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.9811283567381128, |
| "grad_norm": 0.5983940583235254, |
| "learning_rate": 2.0925828471272115e-08, |
| "loss": 1.1492, |
| "step": 2489 |
| }, |
| { |
| "epoch": 0.9815225424981523, |
| "grad_norm": 0.6070581145638013, |
| "learning_rate": 2.0045135118328397e-08, |
| "loss": 1.1946, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.9819167282581917, |
| "grad_norm": 0.6080141003498726, |
| "learning_rate": 1.9183356357215242e-08, |
| "loss": 1.1755, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.9823109140182311, |
| "grad_norm": 0.6183949984566449, |
| "learning_rate": 1.8340493821222827e-08, |
| "loss": 1.234, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.9827050997782705, |
| "grad_norm": 0.6158791546765815, |
| "learning_rate": 1.7516549107795543e-08, |
| "loss": 1.1807, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.9830992855383099, |
| "grad_norm": 0.6008031176354653, |
| "learning_rate": 1.671152377852092e-08, |
| "loss": 1.1555, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.9834934712983493, |
| "grad_norm": 0.6243823889960919, |
| "learning_rate": 1.5925419359130723e-08, |
| "loss": 1.1506, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.9838876570583888, |
| "grad_norm": 0.6092824290673818, |
| "learning_rate": 1.5158237339494285e-08, |
| "loss": 1.1245, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.9842818428184282, |
| "grad_norm": 0.6173876535957193, |
| "learning_rate": 1.4409979173620747e-08, |
| "loss": 1.1329, |
| "step": 2497 |
| }, |
| { |
| "epoch": 0.9846760285784676, |
| "grad_norm": 0.6081775209074783, |
| "learning_rate": 1.3680646279651266e-08, |
| "loss": 1.1479, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.985070214338507, |
| "grad_norm": 0.6202693029416111, |
| "learning_rate": 1.2970240039861248e-08, |
| "loss": 1.2072, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.9854644000985464, |
| "grad_norm": 0.6008870570624699, |
| "learning_rate": 1.2278761800653682e-08, |
| "loss": 1.1418, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.9858585858585859, |
| "grad_norm": 0.624028333998548, |
| "learning_rate": 1.1606212872559142e-08, |
| "loss": 1.2152, |
| "step": 2501 |
| }, |
| { |
| "epoch": 0.9862527716186252, |
| "grad_norm": 0.6239253652188765, |
| "learning_rate": 1.0952594530230232e-08, |
| "loss": 1.2422, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.9866469573786647, |
| "grad_norm": 0.6066337975290457, |
| "learning_rate": 1.0317908012442701e-08, |
| "loss": 1.1602, |
| "step": 2503 |
| }, |
| { |
| "epoch": 0.9870411431387042, |
| "grad_norm": 0.6377500814670377, |
| "learning_rate": 9.702154522092111e-09, |
| "loss": 1.2192, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.9874353288987435, |
| "grad_norm": 0.5987907515887436, |
| "learning_rate": 9.105335226190504e-09, |
| "loss": 1.1616, |
| "step": 2505 |
| }, |
| { |
| "epoch": 0.987829514658783, |
| "grad_norm": 0.6172014036158203, |
| "learning_rate": 8.527451255863073e-09, |
| "loss": 1.225, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.9882237004188223, |
| "grad_norm": 0.6077694286293223, |
| "learning_rate": 7.968503706350384e-09, |
| "loss": 1.1987, |
| "step": 2507 |
| }, |
| { |
| "epoch": 0.9886178861788618, |
| "grad_norm": 0.6138556064349517, |
| "learning_rate": 7.42849363700282e-09, |
| "loss": 1.1483, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.9890120719389012, |
| "grad_norm": 0.6120940708596503, |
| "learning_rate": 6.907422071278369e-09, |
| "loss": 1.1581, |
| "step": 2509 |
| }, |
| { |
| "epoch": 0.9894062576989406, |
| "grad_norm": 0.5962048270770236, |
| "learning_rate": 6.405289996741504e-09, |
| "loss": 1.1662, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.98980044345898, |
| "grad_norm": 0.6184599584147658, |
| "learning_rate": 5.922098365063189e-09, |
| "loss": 1.1495, |
| "step": 2511 |
| }, |
| { |
| "epoch": 0.9901946292190195, |
| "grad_norm": 0.6296776196488952, |
| "learning_rate": 5.457848092015328e-09, |
| "loss": 1.1905, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.9905888149790589, |
| "grad_norm": 0.6131588421344288, |
| "learning_rate": 5.012540057474091e-09, |
| "loss": 1.1818, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.9909830007390983, |
| "grad_norm": 0.5964517876857598, |
| "learning_rate": 4.586175105411039e-09, |
| "loss": 1.1824, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.9913771864991378, |
| "grad_norm": 0.6158891574168905, |
| "learning_rate": 4.178754043898669e-09, |
| "loss": 1.1601, |
| "step": 2515 |
| }, |
| { |
| "epoch": 0.9917713722591771, |
| "grad_norm": 0.6048009237523553, |
| "learning_rate": 3.790277645104867e-09, |
| "loss": 1.1299, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.9921655580192166, |
| "grad_norm": 0.6238556971612192, |
| "learning_rate": 3.420746645292905e-09, |
| "loss": 1.1244, |
| "step": 2517 |
| }, |
| { |
| "epoch": 0.9925597437792559, |
| "grad_norm": 0.6155143754125697, |
| "learning_rate": 3.0701617448203325e-09, |
| "loss": 1.1856, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.9929539295392954, |
| "grad_norm": 0.6156379383507039, |
| "learning_rate": 2.738523608135646e-09, |
| "loss": 1.1921, |
| "step": 2519 |
| }, |
| { |
| "epoch": 0.9933481152993349, |
| "grad_norm": 0.6287557362309201, |
| "learning_rate": 2.4258328637771776e-09, |
| "loss": 1.1696, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.9937423010593742, |
| "grad_norm": 0.6035984671210802, |
| "learning_rate": 2.1320901043764276e-09, |
| "loss": 1.1752, |
| "step": 2521 |
| }, |
| { |
| "epoch": 0.9941364868194137, |
| "grad_norm": 0.6095120389983935, |
| "learning_rate": 1.8572958866514e-09, |
| "loss": 1.1458, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.994530672579453, |
| "grad_norm": 0.7589305669134696, |
| "learning_rate": 1.6014507314077165e-09, |
| "loss": 1.1667, |
| "step": 2523 |
| }, |
| { |
| "epoch": 0.9949248583394925, |
| "grad_norm": 0.6114552923969634, |
| "learning_rate": 1.3645551235386134e-09, |
| "loss": 1.1621, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.9953190440995319, |
| "grad_norm": 0.6058392606138625, |
| "learning_rate": 1.1466095120216126e-09, |
| "loss": 1.1241, |
| "step": 2525 |
| }, |
| { |
| "epoch": 0.9957132298595713, |
| "grad_norm": 0.5936603980813377, |
| "learning_rate": 9.476143099207414e-10, |
| "loss": 1.1423, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.9961074156196107, |
| "grad_norm": 0.5977975525192136, |
| "learning_rate": 7.67569894382092e-10, |
| "loss": 1.1964, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.9965016013796502, |
| "grad_norm": 0.5957259774856952, |
| "learning_rate": 6.064766066382622e-10, |
| "loss": 1.1949, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.9968957871396896, |
| "grad_norm": 0.6094021396471523, |
| "learning_rate": 4.643347520005836e-10, |
| "loss": 1.2123, |
| "step": 2529 |
| }, |
| { |
| "epoch": 0.997289972899729, |
| "grad_norm": 0.6120542827325469, |
| "learning_rate": 3.4114459986689386e-10, |
| "loss": 1.1313, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.9976841586597684, |
| "grad_norm": 0.6151506851061069, |
| "learning_rate": 2.369063837115437e-10, |
| "loss": 1.2058, |
| "step": 2531 |
| }, |
| { |
| "epoch": 0.9980783444198078, |
| "grad_norm": 0.6008592003001969, |
| "learning_rate": 1.5162030109538982e-10, |
| "loss": 1.151, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.9984725301798473, |
| "grad_norm": 0.5857304461429403, |
| "learning_rate": 8.528651365580232e-11, |
| "loss": 1.1576, |
| "step": 2533 |
| }, |
| { |
| "epoch": 0.9988667159398866, |
| "grad_norm": 0.6021334182290597, |
| "learning_rate": 3.790514711332627e-11, |
| "loss": 1.195, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.9992609016999261, |
| "grad_norm": 0.6202010114249676, |
| "learning_rate": 9.476291268351035e-12, |
| "loss": 1.192, |
| "step": 2535 |
| }, |
| { |
| "epoch": 0.9996550874599656, |
| "grad_norm": 0.6201498827195971, |
| "learning_rate": 0.0, |
| "loss": 1.1993, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.9996550874599656, |
| "eval_loss": 1.168265700340271, |
| "eval_runtime": 2983.6589, |
| "eval_samples_per_second": 5.508, |
| "eval_steps_per_second": 0.689, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.9996550874599656, |
| "step": 2536, |
| "total_flos": 661690545340416.0, |
| "train_loss": 1.391600751820423, |
| "train_runtime": 151844.1268, |
| "train_samples_per_second": 1.069, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2536, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 661690545340416.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|