{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9902091677792613, "eval_steps": 500, "global_step": 4450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022251891410769915, "grad_norm": 14.805064169529773, "learning_rate": 0.0, "loss": 1.4534, "step": 1 }, { "epoch": 0.0004450378282153983, "grad_norm": 14.988575393904634, "learning_rate": 7.407407407407409e-08, "loss": 1.4736, "step": 2 }, { "epoch": 0.0006675567423230974, "grad_norm": 14.267864271044003, "learning_rate": 1.4814814814814817e-07, "loss": 1.4555, "step": 3 }, { "epoch": 0.0008900756564307966, "grad_norm": 14.709070637083665, "learning_rate": 2.2222222222222224e-07, "loss": 1.464, "step": 4 }, { "epoch": 0.0011125945705384957, "grad_norm": 14.205506795558225, "learning_rate": 2.9629629629629634e-07, "loss": 1.4299, "step": 5 }, { "epoch": 0.0013351134846461949, "grad_norm": 14.664722210770334, "learning_rate": 3.7037037037037036e-07, "loss": 1.4461, "step": 6 }, { "epoch": 0.001557632398753894, "grad_norm": 14.13434823981024, "learning_rate": 4.444444444444445e-07, "loss": 1.4422, "step": 7 }, { "epoch": 0.0017801513128615932, "grad_norm": 14.357121156479607, "learning_rate": 5.185185185185186e-07, "loss": 1.4416, "step": 8 }, { "epoch": 0.0020026702269692926, "grad_norm": 13.814937916125281, "learning_rate": 5.925925925925927e-07, "loss": 1.435, "step": 9 }, { "epoch": 0.0022251891410769915, "grad_norm": 13.2477529798729, "learning_rate": 6.666666666666667e-07, "loss": 1.3907, "step": 10 }, { "epoch": 0.002447708055184691, "grad_norm": 13.418891627935562, "learning_rate": 7.407407407407407e-07, "loss": 1.4224, "step": 11 }, { "epoch": 0.0026702269692923898, "grad_norm": 12.211150979740676, "learning_rate": 8.14814814814815e-07, "loss": 1.3658, "step": 12 }, { "epoch": 0.002892745883400089, "grad_norm": 11.62292747982299, "learning_rate": 8.88888888888889e-07, "loss": 1.3394, "step": 13 }, { "epoch": 0.003115264797507788, "grad_norm": 11.757895887015204, "learning_rate": 9.62962962962963e-07, "loss": 1.365, "step": 14 }, { "epoch": 0.0033377837116154874, "grad_norm": 11.263746152570533, "learning_rate": 1.0370370370370371e-06, "loss": 1.3536, "step": 15 }, { "epoch": 0.0035603026257231864, "grad_norm": 10.69633233672378, "learning_rate": 1.111111111111111e-06, "loss": 1.2885, "step": 16 }, { "epoch": 0.0037828215398308857, "grad_norm": 8.471956940521213, "learning_rate": 1.1851851851851854e-06, "loss": 1.178, "step": 17 }, { "epoch": 0.004005340453938585, "grad_norm": 8.36963300106361, "learning_rate": 1.2592592592592593e-06, "loss": 1.1499, "step": 18 }, { "epoch": 0.004227859368046284, "grad_norm": 8.579745296177526, "learning_rate": 1.3333333333333334e-06, "loss": 1.1906, "step": 19 }, { "epoch": 0.004450378282153983, "grad_norm": 8.228172526475296, "learning_rate": 1.4074074074074075e-06, "loss": 1.1268, "step": 20 }, { "epoch": 0.004672897196261682, "grad_norm": 8.142064746185051, "learning_rate": 1.4814814814814815e-06, "loss": 1.1505, "step": 21 }, { "epoch": 0.004895416110369382, "grad_norm": 7.629821382943853, "learning_rate": 1.5555555555555558e-06, "loss": 1.068, "step": 22 }, { "epoch": 0.005117935024477081, "grad_norm": 4.94929740755082, "learning_rate": 1.62962962962963e-06, "loss": 0.905, "step": 23 }, { "epoch": 0.0053404539385847796, "grad_norm": 4.494345693722879, "learning_rate": 1.7037037037037038e-06, "loss": 0.9001, "step": 24 }, { "epoch": 0.0055629728526924785, "grad_norm": 4.575135506750878, "learning_rate": 1.777777777777778e-06, "loss": 0.9215, "step": 25 }, { "epoch": 0.005785491766800178, "grad_norm": 4.235734120163825, "learning_rate": 1.8518518518518519e-06, "loss": 0.8891, "step": 26 }, { "epoch": 0.006008010680907877, "grad_norm": 3.9194046792504293, "learning_rate": 1.925925925925926e-06, "loss": 0.8462, "step": 27 }, { "epoch": 0.006230529595015576, "grad_norm": 3.5358457827053393, "learning_rate": 2.0000000000000003e-06, "loss": 0.8397, "step": 28 }, { "epoch": 0.006453048509123275, "grad_norm": 3.4711006216397866, "learning_rate": 2.0740740740740742e-06, "loss": 0.8257, "step": 29 }, { "epoch": 0.006675567423230975, "grad_norm": 2.793147863965787, "learning_rate": 2.148148148148148e-06, "loss": 0.7952, "step": 30 }, { "epoch": 0.006898086337338674, "grad_norm": 2.6212941701263275, "learning_rate": 2.222222222222222e-06, "loss": 0.7935, "step": 31 }, { "epoch": 0.007120605251446373, "grad_norm": 3.243288203471121, "learning_rate": 2.2962962962962964e-06, "loss": 0.7808, "step": 32 }, { "epoch": 0.007343124165554072, "grad_norm": 3.145072545717475, "learning_rate": 2.3703703703703707e-06, "loss": 0.7704, "step": 33 }, { "epoch": 0.0075656430796617715, "grad_norm": 2.7561596600854434, "learning_rate": 2.4444444444444447e-06, "loss": 0.7566, "step": 34 }, { "epoch": 0.00778816199376947, "grad_norm": 2.434418529059167, "learning_rate": 2.5185185185185186e-06, "loss": 0.7436, "step": 35 }, { "epoch": 0.00801068090787717, "grad_norm": 2.4907609149061014, "learning_rate": 2.5925925925925925e-06, "loss": 0.7566, "step": 36 }, { "epoch": 0.008233199821984869, "grad_norm": 2.0709711684468557, "learning_rate": 2.666666666666667e-06, "loss": 0.7369, "step": 37 }, { "epoch": 0.008455718736092568, "grad_norm": 1.987665363251119, "learning_rate": 2.740740740740741e-06, "loss": 0.6822, "step": 38 }, { "epoch": 0.008678237650200267, "grad_norm": 1.8237718289773917, "learning_rate": 2.814814814814815e-06, "loss": 0.7073, "step": 39 }, { "epoch": 0.008900756564307966, "grad_norm": 1.8418973945370947, "learning_rate": 2.888888888888889e-06, "loss": 0.7346, "step": 40 }, { "epoch": 0.009123275478415665, "grad_norm": 1.6666763028081504, "learning_rate": 2.962962962962963e-06, "loss": 0.7102, "step": 41 }, { "epoch": 0.009345794392523364, "grad_norm": 1.5548324380093217, "learning_rate": 3.0370370370370372e-06, "loss": 0.7176, "step": 42 }, { "epoch": 0.009568313306631064, "grad_norm": 1.5174084275671602, "learning_rate": 3.1111111111111116e-06, "loss": 0.6812, "step": 43 }, { "epoch": 0.009790832220738763, "grad_norm": 1.3627746057050303, "learning_rate": 3.1851851851851855e-06, "loss": 0.6916, "step": 44 }, { "epoch": 0.010013351134846462, "grad_norm": 1.3881926512066047, "learning_rate": 3.25925925925926e-06, "loss": 0.674, "step": 45 }, { "epoch": 0.010235870048954161, "grad_norm": 1.2815303407932483, "learning_rate": 3.3333333333333333e-06, "loss": 0.6593, "step": 46 }, { "epoch": 0.01045838896306186, "grad_norm": 1.272250324040584, "learning_rate": 3.4074074074074077e-06, "loss": 0.6548, "step": 47 }, { "epoch": 0.010680907877169559, "grad_norm": 1.274878741867685, "learning_rate": 3.481481481481482e-06, "loss": 0.6631, "step": 48 }, { "epoch": 0.010903426791277258, "grad_norm": 1.2923703859409101, "learning_rate": 3.555555555555556e-06, "loss": 0.6641, "step": 49 }, { "epoch": 0.011125945705384957, "grad_norm": 1.2015342749417977, "learning_rate": 3.6296296296296302e-06, "loss": 0.6592, "step": 50 }, { "epoch": 0.011348464619492658, "grad_norm": 1.2543481228245943, "learning_rate": 3.7037037037037037e-06, "loss": 0.6443, "step": 51 }, { "epoch": 0.011570983533600357, "grad_norm": 1.2434752563851466, "learning_rate": 3.777777777777778e-06, "loss": 0.6423, "step": 52 }, { "epoch": 0.011793502447708056, "grad_norm": 1.2923137178075796, "learning_rate": 3.851851851851852e-06, "loss": 0.6599, "step": 53 }, { "epoch": 0.012016021361815754, "grad_norm": 1.308092817016021, "learning_rate": 3.925925925925926e-06, "loss": 0.6625, "step": 54 }, { "epoch": 0.012238540275923453, "grad_norm": 1.2927547648994773, "learning_rate": 4.000000000000001e-06, "loss": 0.6621, "step": 55 }, { "epoch": 0.012461059190031152, "grad_norm": 1.2985094437653952, "learning_rate": 4.074074074074074e-06, "loss": 0.6421, "step": 56 }, { "epoch": 0.012683578104138851, "grad_norm": 1.2137222654583852, "learning_rate": 4.1481481481481485e-06, "loss": 0.6425, "step": 57 }, { "epoch": 0.01290609701824655, "grad_norm": 1.1643749020784366, "learning_rate": 4.222222222222223e-06, "loss": 0.6483, "step": 58 }, { "epoch": 0.01312861593235425, "grad_norm": 1.143799505122513, "learning_rate": 4.296296296296296e-06, "loss": 0.6456, "step": 59 }, { "epoch": 0.01335113484646195, "grad_norm": 1.2074384216374465, "learning_rate": 4.370370370370371e-06, "loss": 0.6229, "step": 60 }, { "epoch": 0.013573653760569649, "grad_norm": 1.1590361724586282, "learning_rate": 4.444444444444444e-06, "loss": 0.6389, "step": 61 }, { "epoch": 0.013796172674677348, "grad_norm": 1.2367908579523814, "learning_rate": 4.5185185185185185e-06, "loss": 0.6215, "step": 62 }, { "epoch": 0.014018691588785047, "grad_norm": 1.1217611148587108, "learning_rate": 4.592592592592593e-06, "loss": 0.6482, "step": 63 }, { "epoch": 0.014241210502892745, "grad_norm": 1.0767138315865972, "learning_rate": 4.666666666666667e-06, "loss": 0.616, "step": 64 }, { "epoch": 0.014463729417000444, "grad_norm": 1.03129383792538, "learning_rate": 4.7407407407407415e-06, "loss": 0.6519, "step": 65 }, { "epoch": 0.014686248331108143, "grad_norm": 1.0427846735386603, "learning_rate": 4.814814814814815e-06, "loss": 0.6311, "step": 66 }, { "epoch": 0.014908767245215844, "grad_norm": 1.0546467330147278, "learning_rate": 4.888888888888889e-06, "loss": 0.6274, "step": 67 }, { "epoch": 0.015131286159323543, "grad_norm": 1.1418401122838222, "learning_rate": 4.962962962962964e-06, "loss": 0.6295, "step": 68 }, { "epoch": 0.015353805073431242, "grad_norm": 1.0945879590018002, "learning_rate": 5.037037037037037e-06, "loss": 0.6223, "step": 69 }, { "epoch": 0.01557632398753894, "grad_norm": 1.02017311092877, "learning_rate": 5.1111111111111115e-06, "loss": 0.6195, "step": 70 }, { "epoch": 0.01579884290164664, "grad_norm": 1.0039566278962895, "learning_rate": 5.185185185185185e-06, "loss": 0.6294, "step": 71 }, { "epoch": 0.01602136181575434, "grad_norm": 1.0842206231476126, "learning_rate": 5.259259259259259e-06, "loss": 0.6199, "step": 72 }, { "epoch": 0.016243880729862038, "grad_norm": 0.9610003019997141, "learning_rate": 5.333333333333334e-06, "loss": 0.6242, "step": 73 }, { "epoch": 0.016466399643969738, "grad_norm": 0.9558077109511962, "learning_rate": 5.407407407407408e-06, "loss": 0.6311, "step": 74 }, { "epoch": 0.016688918558077435, "grad_norm": 0.9806642533333984, "learning_rate": 5.481481481481482e-06, "loss": 0.6194, "step": 75 }, { "epoch": 0.016911437472185136, "grad_norm": 1.0189848367834167, "learning_rate": 5.555555555555557e-06, "loss": 0.6031, "step": 76 }, { "epoch": 0.017133956386292833, "grad_norm": 1.0062385560975715, "learning_rate": 5.62962962962963e-06, "loss": 0.6145, "step": 77 }, { "epoch": 0.017356475300400534, "grad_norm": 1.0044397934917122, "learning_rate": 5.7037037037037045e-06, "loss": 0.5954, "step": 78 }, { "epoch": 0.017578994214508235, "grad_norm": 1.018599833939057, "learning_rate": 5.777777777777778e-06, "loss": 0.6173, "step": 79 }, { "epoch": 0.017801513128615932, "grad_norm": 0.9712443701388657, "learning_rate": 5.8518518518518515e-06, "loss": 0.6251, "step": 80 }, { "epoch": 0.018024032042723633, "grad_norm": 1.070584292512805, "learning_rate": 5.925925925925926e-06, "loss": 0.606, "step": 81 }, { "epoch": 0.01824655095683133, "grad_norm": 0.9380534816043444, "learning_rate": 6e-06, "loss": 0.609, "step": 82 }, { "epoch": 0.01846906987093903, "grad_norm": 1.0177385216564403, "learning_rate": 6.0740740740740745e-06, "loss": 0.6191, "step": 83 }, { "epoch": 0.018691588785046728, "grad_norm": 1.07150467280062, "learning_rate": 6.148148148148149e-06, "loss": 0.6114, "step": 84 }, { "epoch": 0.018914107699154428, "grad_norm": 1.1321205871379325, "learning_rate": 6.222222222222223e-06, "loss": 0.6154, "step": 85 }, { "epoch": 0.01913662661326213, "grad_norm": 1.150107073946682, "learning_rate": 6.296296296296297e-06, "loss": 0.6283, "step": 86 }, { "epoch": 0.019359145527369826, "grad_norm": 1.1274481464348334, "learning_rate": 6.370370370370371e-06, "loss": 0.6126, "step": 87 }, { "epoch": 0.019581664441477527, "grad_norm": 0.9968586490959516, "learning_rate": 6.444444444444445e-06, "loss": 0.6038, "step": 88 }, { "epoch": 0.019804183355585224, "grad_norm": 1.113502463527516, "learning_rate": 6.51851851851852e-06, "loss": 0.6, "step": 89 }, { "epoch": 0.020026702269692925, "grad_norm": 1.096650093857812, "learning_rate": 6.592592592592592e-06, "loss": 0.6064, "step": 90 }, { "epoch": 0.020249221183800622, "grad_norm": 1.0550857573325627, "learning_rate": 6.666666666666667e-06, "loss": 0.6212, "step": 91 }, { "epoch": 0.020471740097908322, "grad_norm": 1.0231097043172241, "learning_rate": 6.740740740740741e-06, "loss": 0.6005, "step": 92 }, { "epoch": 0.02069425901201602, "grad_norm": 1.067861405008909, "learning_rate": 6.814814814814815e-06, "loss": 0.5964, "step": 93 }, { "epoch": 0.02091677792612372, "grad_norm": 1.0053068637634606, "learning_rate": 6.88888888888889e-06, "loss": 0.6009, "step": 94 }, { "epoch": 0.02113929684023142, "grad_norm": 0.9537284979652322, "learning_rate": 6.962962962962964e-06, "loss": 0.5995, "step": 95 }, { "epoch": 0.021361815754339118, "grad_norm": 1.0737801742866517, "learning_rate": 7.0370370370370375e-06, "loss": 0.6202, "step": 96 }, { "epoch": 0.02158433466844682, "grad_norm": 1.0164265113792472, "learning_rate": 7.111111111111112e-06, "loss": 0.6028, "step": 97 }, { "epoch": 0.021806853582554516, "grad_norm": 1.0230237730016223, "learning_rate": 7.185185185185186e-06, "loss": 0.6073, "step": 98 }, { "epoch": 0.022029372496662217, "grad_norm": 1.0164563544542569, "learning_rate": 7.2592592592592605e-06, "loss": 0.6027, "step": 99 }, { "epoch": 0.022251891410769914, "grad_norm": 1.1682129966556503, "learning_rate": 7.333333333333333e-06, "loss": 0.5962, "step": 100 }, { "epoch": 0.022474410324877615, "grad_norm": 1.1020053931345173, "learning_rate": 7.4074074074074075e-06, "loss": 0.612, "step": 101 }, { "epoch": 0.022696929238985315, "grad_norm": 0.9603580981855548, "learning_rate": 7.481481481481482e-06, "loss": 0.5882, "step": 102 }, { "epoch": 0.022919448153093012, "grad_norm": 1.0155650581556304, "learning_rate": 7.555555555555556e-06, "loss": 0.592, "step": 103 }, { "epoch": 0.023141967067200713, "grad_norm": 1.0874809695546084, "learning_rate": 7.62962962962963e-06, "loss": 0.5943, "step": 104 }, { "epoch": 0.02336448598130841, "grad_norm": 1.0324885719473127, "learning_rate": 7.703703703703704e-06, "loss": 0.5909, "step": 105 }, { "epoch": 0.02358700489541611, "grad_norm": 1.04568287166136, "learning_rate": 7.77777777777778e-06, "loss": 0.5889, "step": 106 }, { "epoch": 0.023809523809523808, "grad_norm": 0.950991544982278, "learning_rate": 7.851851851851853e-06, "loss": 0.5892, "step": 107 }, { "epoch": 0.02403204272363151, "grad_norm": 1.0005060949308822, "learning_rate": 7.925925925925926e-06, "loss": 0.5716, "step": 108 }, { "epoch": 0.02425456163773921, "grad_norm": 0.9540144189759479, "learning_rate": 8.000000000000001e-06, "loss": 0.5823, "step": 109 }, { "epoch": 0.024477080551846907, "grad_norm": 1.0803969947343315, "learning_rate": 8.074074074074075e-06, "loss": 0.6006, "step": 110 }, { "epoch": 0.024699599465954607, "grad_norm": 1.0054306319981887, "learning_rate": 8.148148148148148e-06, "loss": 0.5949, "step": 111 }, { "epoch": 0.024922118380062305, "grad_norm": 1.0227306856843215, "learning_rate": 8.222222222222222e-06, "loss": 0.5669, "step": 112 }, { "epoch": 0.025144637294170005, "grad_norm": 0.9727556136558161, "learning_rate": 8.296296296296297e-06, "loss": 0.6001, "step": 113 }, { "epoch": 0.025367156208277702, "grad_norm": 1.0176524880125282, "learning_rate": 8.37037037037037e-06, "loss": 0.5891, "step": 114 }, { "epoch": 0.025589675122385403, "grad_norm": 0.9077666220594863, "learning_rate": 8.444444444444446e-06, "loss": 0.5727, "step": 115 }, { "epoch": 0.0258121940364931, "grad_norm": 0.9489689200007702, "learning_rate": 8.518518518518519e-06, "loss": 0.6116, "step": 116 }, { "epoch": 0.0260347129506008, "grad_norm": 0.9659919557468327, "learning_rate": 8.592592592592593e-06, "loss": 0.5936, "step": 117 }, { "epoch": 0.0262572318647085, "grad_norm": 1.0717450155944557, "learning_rate": 8.666666666666668e-06, "loss": 0.5844, "step": 118 }, { "epoch": 0.0264797507788162, "grad_norm": 1.0038701241609913, "learning_rate": 8.740740740740741e-06, "loss": 0.5976, "step": 119 }, { "epoch": 0.0267022696929239, "grad_norm": 0.9413135709373222, "learning_rate": 8.814814814814817e-06, "loss": 0.5995, "step": 120 }, { "epoch": 0.026924788607031597, "grad_norm": 1.0767036636994407, "learning_rate": 8.888888888888888e-06, "loss": 0.579, "step": 121 }, { "epoch": 0.027147307521139297, "grad_norm": 0.9820389667958538, "learning_rate": 8.962962962962963e-06, "loss": 0.5807, "step": 122 }, { "epoch": 0.027369826435246995, "grad_norm": 1.0426857989852758, "learning_rate": 9.037037037037037e-06, "loss": 0.5907, "step": 123 }, { "epoch": 0.027592345349354695, "grad_norm": 1.0657217409891804, "learning_rate": 9.111111111111112e-06, "loss": 0.5869, "step": 124 }, { "epoch": 0.027814864263462396, "grad_norm": 1.1881800138232768, "learning_rate": 9.185185185185186e-06, "loss": 0.5991, "step": 125 }, { "epoch": 0.028037383177570093, "grad_norm": 1.0178330534285855, "learning_rate": 9.25925925925926e-06, "loss": 0.5748, "step": 126 }, { "epoch": 0.028259902091677794, "grad_norm": 1.075880361806578, "learning_rate": 9.333333333333334e-06, "loss": 0.6057, "step": 127 }, { "epoch": 0.02848242100578549, "grad_norm": 1.0020510025420173, "learning_rate": 9.407407407407408e-06, "loss": 0.598, "step": 128 }, { "epoch": 0.02870493991989319, "grad_norm": 1.0858844813857609, "learning_rate": 9.481481481481483e-06, "loss": 0.5925, "step": 129 }, { "epoch": 0.02892745883400089, "grad_norm": 0.9967251243671496, "learning_rate": 9.555555555555556e-06, "loss": 0.5667, "step": 130 }, { "epoch": 0.02914997774810859, "grad_norm": 1.0950762438530965, "learning_rate": 9.62962962962963e-06, "loss": 0.6161, "step": 131 }, { "epoch": 0.029372496662216287, "grad_norm": 1.065524990407738, "learning_rate": 9.703703703703703e-06, "loss": 0.5899, "step": 132 }, { "epoch": 0.029595015576323987, "grad_norm": 1.0127490985818217, "learning_rate": 9.777777777777779e-06, "loss": 0.5644, "step": 133 }, { "epoch": 0.029817534490431688, "grad_norm": 1.127971216985517, "learning_rate": 9.851851851851852e-06, "loss": 0.5907, "step": 134 }, { "epoch": 0.030040053404539385, "grad_norm": 0.9952279783684734, "learning_rate": 9.925925925925927e-06, "loss": 0.5826, "step": 135 }, { "epoch": 0.030262572318647086, "grad_norm": 1.050899118835438, "learning_rate": 1e-05, "loss": 0.5777, "step": 136 }, { "epoch": 0.030485091232754783, "grad_norm": 1.0737404614461061, "learning_rate": 9.999998701428113e-06, "loss": 0.5748, "step": 137 }, { "epoch": 0.030707610146862484, "grad_norm": 1.0220646415411414, "learning_rate": 9.99999480571312e-06, "loss": 0.5824, "step": 138 }, { "epoch": 0.03093012906097018, "grad_norm": 1.051600798772922, "learning_rate": 9.999988312857046e-06, "loss": 0.5794, "step": 139 }, { "epoch": 0.03115264797507788, "grad_norm": 1.034685982236644, "learning_rate": 9.999979222863266e-06, "loss": 0.5834, "step": 140 }, { "epoch": 0.03137516688918558, "grad_norm": 1.077716917125049, "learning_rate": 9.999967535736498e-06, "loss": 0.6116, "step": 141 }, { "epoch": 0.03159768580329328, "grad_norm": 1.0386938754803965, "learning_rate": 9.999953251482817e-06, "loss": 0.5788, "step": 142 }, { "epoch": 0.03182020471740098, "grad_norm": 1.1065409024859802, "learning_rate": 9.99993637010964e-06, "loss": 0.5957, "step": 143 }, { "epoch": 0.03204272363150868, "grad_norm": 1.121412229095629, "learning_rate": 9.999916891625736e-06, "loss": 0.5903, "step": 144 }, { "epoch": 0.03226524254561638, "grad_norm": 1.0631385538479787, "learning_rate": 9.999894816041222e-06, "loss": 0.5863, "step": 145 }, { "epoch": 0.032487761459724075, "grad_norm": 1.0321814916461307, "learning_rate": 9.999870143367565e-06, "loss": 0.6047, "step": 146 }, { "epoch": 0.03271028037383177, "grad_norm": 1.1151416530834044, "learning_rate": 9.999842873617583e-06, "loss": 0.5756, "step": 147 }, { "epoch": 0.032932799287939477, "grad_norm": 1.0383345970578801, "learning_rate": 9.999813006805436e-06, "loss": 0.5806, "step": 148 }, { "epoch": 0.033155318202047174, "grad_norm": 1.0835595050679159, "learning_rate": 9.999780542946643e-06, "loss": 0.5912, "step": 149 }, { "epoch": 0.03337783711615487, "grad_norm": 0.9658058969998186, "learning_rate": 9.999745482058063e-06, "loss": 0.5845, "step": 150 }, { "epoch": 0.033600356030262575, "grad_norm": 1.0721319449450508, "learning_rate": 9.999707824157909e-06, "loss": 0.5708, "step": 151 }, { "epoch": 0.03382287494437027, "grad_norm": 1.1991682058372048, "learning_rate": 9.999667569265741e-06, "loss": 0.5796, "step": 152 }, { "epoch": 0.03404539385847797, "grad_norm": 1.1958079998447684, "learning_rate": 9.999624717402468e-06, "loss": 0.5993, "step": 153 }, { "epoch": 0.03426791277258567, "grad_norm": 1.0956798061495305, "learning_rate": 9.999579268590352e-06, "loss": 0.5972, "step": 154 }, { "epoch": 0.03449043168669337, "grad_norm": 1.144949354530655, "learning_rate": 9.999531222852996e-06, "loss": 0.5547, "step": 155 }, { "epoch": 0.03471295060080107, "grad_norm": 1.0271032911687206, "learning_rate": 9.999480580215356e-06, "loss": 0.581, "step": 156 }, { "epoch": 0.034935469514908765, "grad_norm": 0.9870581007913481, "learning_rate": 9.999427340703743e-06, "loss": 0.559, "step": 157 }, { "epoch": 0.03515798842901647, "grad_norm": 1.04500301714393, "learning_rate": 9.999371504345806e-06, "loss": 0.5786, "step": 158 }, { "epoch": 0.035380507343124167, "grad_norm": 1.0947186906172615, "learning_rate": 9.99931307117055e-06, "loss": 0.5767, "step": 159 }, { "epoch": 0.035603026257231864, "grad_norm": 1.0460409203869294, "learning_rate": 9.999252041208325e-06, "loss": 0.5914, "step": 160 }, { "epoch": 0.03582554517133956, "grad_norm": 1.0455905520202602, "learning_rate": 9.999188414490834e-06, "loss": 0.5515, "step": 161 }, { "epoch": 0.036048064085447265, "grad_norm": 0.9961105273706204, "learning_rate": 9.999122191051126e-06, "loss": 0.5895, "step": 162 }, { "epoch": 0.03627058299955496, "grad_norm": 1.0663650043157753, "learning_rate": 9.9990533709236e-06, "loss": 0.5815, "step": 163 }, { "epoch": 0.03649310191366266, "grad_norm": 1.0915654406675595, "learning_rate": 9.998981954144002e-06, "loss": 0.5866, "step": 164 }, { "epoch": 0.036715620827770364, "grad_norm": 1.0085347483902798, "learning_rate": 9.998907940749427e-06, "loss": 0.5712, "step": 165 }, { "epoch": 0.03693813974187806, "grad_norm": 1.1542636204825776, "learning_rate": 9.99883133077832e-06, "loss": 0.6101, "step": 166 }, { "epoch": 0.03716065865598576, "grad_norm": 0.935695834385868, "learning_rate": 9.998752124270477e-06, "loss": 0.5686, "step": 167 }, { "epoch": 0.037383177570093455, "grad_norm": 1.0081127358373718, "learning_rate": 9.998670321267036e-06, "loss": 0.5634, "step": 168 }, { "epoch": 0.03760569648420116, "grad_norm": 1.027463615030228, "learning_rate": 9.998585921810493e-06, "loss": 0.5787, "step": 169 }, { "epoch": 0.037828215398308856, "grad_norm": 1.0704605369686615, "learning_rate": 9.998498925944683e-06, "loss": 0.5707, "step": 170 }, { "epoch": 0.038050734312416554, "grad_norm": 1.1412405473941971, "learning_rate": 9.998409333714796e-06, "loss": 0.5943, "step": 171 }, { "epoch": 0.03827325322652426, "grad_norm": 1.0519147229496455, "learning_rate": 9.998317145167368e-06, "loss": 0.5821, "step": 172 }, { "epoch": 0.038495772140631955, "grad_norm": 1.0087033790052895, "learning_rate": 9.998222360350286e-06, "loss": 0.5672, "step": 173 }, { "epoch": 0.03871829105473965, "grad_norm": 1.1363432387920345, "learning_rate": 9.998124979312784e-06, "loss": 0.5922, "step": 174 }, { "epoch": 0.03894080996884735, "grad_norm": 1.031179721193816, "learning_rate": 9.998025002105441e-06, "loss": 0.5617, "step": 175 }, { "epoch": 0.039163328882955054, "grad_norm": 1.062649564589946, "learning_rate": 9.997922428780192e-06, "loss": 0.5891, "step": 176 }, { "epoch": 0.03938584779706275, "grad_norm": 1.1632264010471747, "learning_rate": 9.997817259390314e-06, "loss": 0.5899, "step": 177 }, { "epoch": 0.03960836671117045, "grad_norm": 1.1235270324657056, "learning_rate": 9.997709493990437e-06, "loss": 0.5698, "step": 178 }, { "epoch": 0.03983088562527815, "grad_norm": 1.1447592748836215, "learning_rate": 9.997599132636538e-06, "loss": 0.5897, "step": 179 }, { "epoch": 0.04005340453938585, "grad_norm": 1.0611876334157435, "learning_rate": 9.997486175385938e-06, "loss": 0.5648, "step": 180 }, { "epoch": 0.040275923453493546, "grad_norm": 1.046011108071441, "learning_rate": 9.997370622297313e-06, "loss": 0.5716, "step": 181 }, { "epoch": 0.040498442367601244, "grad_norm": 1.0484237269791705, "learning_rate": 9.997252473430686e-06, "loss": 0.5819, "step": 182 }, { "epoch": 0.04072096128170895, "grad_norm": 1.1207390465238902, "learning_rate": 9.997131728847422e-06, "loss": 0.5783, "step": 183 }, { "epoch": 0.040943480195816645, "grad_norm": 0.9461544590897419, "learning_rate": 9.997008388610244e-06, "loss": 0.5695, "step": 184 }, { "epoch": 0.04116599910992434, "grad_norm": 0.9811577366299109, "learning_rate": 9.996882452783217e-06, "loss": 0.5795, "step": 185 }, { "epoch": 0.04138851802403204, "grad_norm": 1.1415115647298963, "learning_rate": 9.996753921431754e-06, "loss": 0.5667, "step": 186 }, { "epoch": 0.041611036938139744, "grad_norm": 1.1465761871975184, "learning_rate": 9.996622794622621e-06, "loss": 0.5844, "step": 187 }, { "epoch": 0.04183355585224744, "grad_norm": 1.0066251395282093, "learning_rate": 9.996489072423927e-06, "loss": 0.5825, "step": 188 }, { "epoch": 0.04205607476635514, "grad_norm": 1.0054497478579414, "learning_rate": 9.996352754905133e-06, "loss": 0.5596, "step": 189 }, { "epoch": 0.04227859368046284, "grad_norm": 0.957850811324562, "learning_rate": 9.99621384213704e-06, "loss": 0.5672, "step": 190 }, { "epoch": 0.04250111259457054, "grad_norm": 1.039249397842595, "learning_rate": 9.996072334191814e-06, "loss": 0.5793, "step": 191 }, { "epoch": 0.042723631508678236, "grad_norm": 1.0734415027432567, "learning_rate": 9.995928231142949e-06, "loss": 0.5659, "step": 192 }, { "epoch": 0.042946150422785934, "grad_norm": 1.1049385759107817, "learning_rate": 9.9957815330653e-06, "loss": 0.5682, "step": 193 }, { "epoch": 0.04316866933689364, "grad_norm": 1.0540371923261882, "learning_rate": 9.995632240035065e-06, "loss": 0.572, "step": 194 }, { "epoch": 0.043391188251001335, "grad_norm": 1.02074240352455, "learning_rate": 9.995480352129794e-06, "loss": 0.5581, "step": 195 }, { "epoch": 0.04361370716510903, "grad_norm": 1.0190202783119844, "learning_rate": 9.995325869428379e-06, "loss": 0.551, "step": 196 }, { "epoch": 0.043836226079216736, "grad_norm": 0.9936015112441368, "learning_rate": 9.995168792011062e-06, "loss": 0.5822, "step": 197 }, { "epoch": 0.044058744993324434, "grad_norm": 1.1931134482766559, "learning_rate": 9.995009119959438e-06, "loss": 0.5688, "step": 198 }, { "epoch": 0.04428126390743213, "grad_norm": 1.1189118786080814, "learning_rate": 9.994846853356442e-06, "loss": 0.5873, "step": 199 }, { "epoch": 0.04450378282153983, "grad_norm": 1.0670883881185285, "learning_rate": 9.994681992286359e-06, "loss": 0.581, "step": 200 }, { "epoch": 0.04472630173564753, "grad_norm": 1.2243896097393445, "learning_rate": 9.994514536834824e-06, "loss": 0.5873, "step": 201 }, { "epoch": 0.04494882064975523, "grad_norm": 0.9915102648283382, "learning_rate": 9.994344487088818e-06, "loss": 0.554, "step": 202 }, { "epoch": 0.045171339563862926, "grad_norm": 1.0270181583291154, "learning_rate": 9.994171843136671e-06, "loss": 0.5709, "step": 203 }, { "epoch": 0.04539385847797063, "grad_norm": 0.9855175494900786, "learning_rate": 9.993996605068057e-06, "loss": 0.5818, "step": 204 }, { "epoch": 0.04561637739207833, "grad_norm": 1.0112286931210832, "learning_rate": 9.993818772974002e-06, "loss": 0.5893, "step": 205 }, { "epoch": 0.045838896306186025, "grad_norm": 1.0118243084148393, "learning_rate": 9.993638346946875e-06, "loss": 0.5752, "step": 206 }, { "epoch": 0.04606141522029372, "grad_norm": 1.0466272090069078, "learning_rate": 9.993455327080394e-06, "loss": 0.579, "step": 207 }, { "epoch": 0.046283934134401426, "grad_norm": 1.0497843861913054, "learning_rate": 9.99326971346963e-06, "loss": 0.5677, "step": 208 }, { "epoch": 0.046506453048509123, "grad_norm": 1.1048092946552692, "learning_rate": 9.993081506210988e-06, "loss": 0.572, "step": 209 }, { "epoch": 0.04672897196261682, "grad_norm": 1.223264982813725, "learning_rate": 9.992890705402233e-06, "loss": 0.5752, "step": 210 }, { "epoch": 0.046951490876724525, "grad_norm": 1.0613649914131351, "learning_rate": 9.992697311142474e-06, "loss": 0.5622, "step": 211 }, { "epoch": 0.04717400979083222, "grad_norm": 1.0475577804553795, "learning_rate": 9.992501323532161e-06, "loss": 0.5704, "step": 212 }, { "epoch": 0.04739652870493992, "grad_norm": 1.019712308785494, "learning_rate": 9.9923027426731e-06, "loss": 0.5743, "step": 213 }, { "epoch": 0.047619047619047616, "grad_norm": 1.0342875781158654, "learning_rate": 9.992101568668437e-06, "loss": 0.5685, "step": 214 }, { "epoch": 0.04784156653315532, "grad_norm": 1.0017959734778918, "learning_rate": 9.991897801622669e-06, "loss": 0.5867, "step": 215 }, { "epoch": 0.04806408544726302, "grad_norm": 1.0879631622878374, "learning_rate": 9.991691441641637e-06, "loss": 0.5473, "step": 216 }, { "epoch": 0.048286604361370715, "grad_norm": 1.0340132095661072, "learning_rate": 9.991482488832531e-06, "loss": 0.5859, "step": 217 }, { "epoch": 0.04850912327547842, "grad_norm": 1.0098762873333231, "learning_rate": 9.991270943303886e-06, "loss": 0.5745, "step": 218 }, { "epoch": 0.048731642189586116, "grad_norm": 1.091471694845796, "learning_rate": 9.991056805165587e-06, "loss": 0.5649, "step": 219 }, { "epoch": 0.04895416110369381, "grad_norm": 0.9476341261006782, "learning_rate": 9.99084007452886e-06, "loss": 0.5848, "step": 220 }, { "epoch": 0.04917668001780151, "grad_norm": 1.0071785496014138, "learning_rate": 9.990620751506286e-06, "loss": 0.5789, "step": 221 }, { "epoch": 0.049399198931909215, "grad_norm": 1.0138022364789991, "learning_rate": 9.990398836211786e-06, "loss": 0.5842, "step": 222 }, { "epoch": 0.04962171784601691, "grad_norm": 0.927070874470253, "learning_rate": 9.990174328760626e-06, "loss": 0.5819, "step": 223 }, { "epoch": 0.04984423676012461, "grad_norm": 0.9894266446349573, "learning_rate": 9.989947229269426e-06, "loss": 0.5634, "step": 224 }, { "epoch": 0.050066755674232306, "grad_norm": 0.9917627502826187, "learning_rate": 9.989717537856143e-06, "loss": 0.5567, "step": 225 }, { "epoch": 0.05028927458834001, "grad_norm": 0.9729367471907695, "learning_rate": 9.989485254640092e-06, "loss": 0.578, "step": 226 }, { "epoch": 0.05051179350244771, "grad_norm": 0.9753948115130938, "learning_rate": 9.989250379741922e-06, "loss": 0.5695, "step": 227 }, { "epoch": 0.050734312416555405, "grad_norm": 1.1062853793301464, "learning_rate": 9.989012913283636e-06, "loss": 0.575, "step": 228 }, { "epoch": 0.05095683133066311, "grad_norm": 0.975637770319214, "learning_rate": 9.98877285538858e-06, "loss": 0.5781, "step": 229 }, { "epoch": 0.051179350244770806, "grad_norm": 1.0670825608359962, "learning_rate": 9.988530206181448e-06, "loss": 0.5835, "step": 230 }, { "epoch": 0.0514018691588785, "grad_norm": 1.0189966800762689, "learning_rate": 9.988284965788278e-06, "loss": 0.5584, "step": 231 }, { "epoch": 0.0516243880729862, "grad_norm": 1.0362046179566455, "learning_rate": 9.988037134336457e-06, "loss": 0.5672, "step": 232 }, { "epoch": 0.051846906987093905, "grad_norm": 0.9694995261129242, "learning_rate": 9.987786711954712e-06, "loss": 0.578, "step": 233 }, { "epoch": 0.0520694259012016, "grad_norm": 0.9940835899754754, "learning_rate": 9.987533698773122e-06, "loss": 0.5814, "step": 234 }, { "epoch": 0.0522919448153093, "grad_norm": 1.035744548341212, "learning_rate": 9.987278094923111e-06, "loss": 0.5639, "step": 235 }, { "epoch": 0.052514463729417, "grad_norm": 0.9921374887936251, "learning_rate": 9.987019900537445e-06, "loss": 0.5774, "step": 236 }, { "epoch": 0.0527369826435247, "grad_norm": 0.9226922563876878, "learning_rate": 9.986759115750236e-06, "loss": 0.5651, "step": 237 }, { "epoch": 0.0529595015576324, "grad_norm": 1.028144736079073, "learning_rate": 9.986495740696946e-06, "loss": 0.5723, "step": 238 }, { "epoch": 0.053182020471740095, "grad_norm": 0.99688315239542, "learning_rate": 9.98622977551438e-06, "loss": 0.5742, "step": 239 }, { "epoch": 0.0534045393858478, "grad_norm": 1.0057497476494421, "learning_rate": 9.985961220340684e-06, "loss": 0.565, "step": 240 }, { "epoch": 0.053627058299955496, "grad_norm": 0.9949748409279362, "learning_rate": 9.985690075315355e-06, "loss": 0.5843, "step": 241 }, { "epoch": 0.05384957721406319, "grad_norm": 1.0083140364232714, "learning_rate": 9.985416340579236e-06, "loss": 0.5834, "step": 242 }, { "epoch": 0.0540720961281709, "grad_norm": 1.0457646607770845, "learning_rate": 9.98514001627451e-06, "loss": 0.5446, "step": 243 }, { "epoch": 0.054294615042278595, "grad_norm": 0.9650879513593769, "learning_rate": 9.984861102544709e-06, "loss": 0.5726, "step": 244 }, { "epoch": 0.05451713395638629, "grad_norm": 0.9231539170179037, "learning_rate": 9.98457959953471e-06, "loss": 0.5466, "step": 245 }, { "epoch": 0.05473965287049399, "grad_norm": 0.947087322751439, "learning_rate": 9.984295507390728e-06, "loss": 0.5456, "step": 246 }, { "epoch": 0.05496217178460169, "grad_norm": 1.0408121734738367, "learning_rate": 9.984008826260337e-06, "loss": 0.5444, "step": 247 }, { "epoch": 0.05518469069870939, "grad_norm": 1.044989076731347, "learning_rate": 9.983719556292442e-06, "loss": 0.5875, "step": 248 }, { "epoch": 0.05540720961281709, "grad_norm": 0.969479431985202, "learning_rate": 9.983427697637298e-06, "loss": 0.563, "step": 249 }, { "epoch": 0.05562972852692479, "grad_norm": 0.9831716300222378, "learning_rate": 9.983133250446509e-06, "loss": 0.5591, "step": 250 }, { "epoch": 0.05585224744103249, "grad_norm": 1.0490744956971927, "learning_rate": 9.982836214873015e-06, "loss": 0.5641, "step": 251 }, { "epoch": 0.056074766355140186, "grad_norm": 0.9637253868321198, "learning_rate": 9.982536591071105e-06, "loss": 0.576, "step": 252 }, { "epoch": 0.05629728526924788, "grad_norm": 1.067357004717406, "learning_rate": 9.982234379196415e-06, "loss": 0.5759, "step": 253 }, { "epoch": 0.05651980418335559, "grad_norm": 1.1242866441811812, "learning_rate": 9.981929579405921e-06, "loss": 0.5489, "step": 254 }, { "epoch": 0.056742323097463285, "grad_norm": 0.9613953504913342, "learning_rate": 9.981622191857944e-06, "loss": 0.5703, "step": 255 }, { "epoch": 0.05696484201157098, "grad_norm": 0.957198422520105, "learning_rate": 9.981312216712153e-06, "loss": 0.5578, "step": 256 }, { "epoch": 0.057187360925678686, "grad_norm": 0.9972769351948032, "learning_rate": 9.980999654129556e-06, "loss": 0.5669, "step": 257 }, { "epoch": 0.05740987983978638, "grad_norm": 1.0274678721828265, "learning_rate": 9.980684504272504e-06, "loss": 0.5733, "step": 258 }, { "epoch": 0.05763239875389408, "grad_norm": 1.065693891877648, "learning_rate": 9.9803667673047e-06, "loss": 0.5678, "step": 259 }, { "epoch": 0.05785491766800178, "grad_norm": 0.9786656501467885, "learning_rate": 9.980046443391182e-06, "loss": 0.5622, "step": 260 }, { "epoch": 0.05807743658210948, "grad_norm": 1.0474371793203054, "learning_rate": 9.979723532698338e-06, "loss": 0.579, "step": 261 }, { "epoch": 0.05829995549621718, "grad_norm": 1.0780063838569058, "learning_rate": 9.979398035393894e-06, "loss": 0.5714, "step": 262 }, { "epoch": 0.058522474410324876, "grad_norm": 1.0675245572442278, "learning_rate": 9.979069951646926e-06, "loss": 0.5646, "step": 263 }, { "epoch": 0.05874499332443257, "grad_norm": 0.9551723662241509, "learning_rate": 9.97873928162785e-06, "loss": 0.5578, "step": 264 }, { "epoch": 0.05896751223854028, "grad_norm": 1.1666959765219784, "learning_rate": 9.978406025508423e-06, "loss": 0.5564, "step": 265 }, { "epoch": 0.059190031152647975, "grad_norm": 1.073002409875055, "learning_rate": 9.978070183461747e-06, "loss": 0.5735, "step": 266 }, { "epoch": 0.05941255006675567, "grad_norm": 1.0595786180163682, "learning_rate": 9.977731755662274e-06, "loss": 0.5654, "step": 267 }, { "epoch": 0.059635068980863376, "grad_norm": 1.0298281496151291, "learning_rate": 9.977390742285788e-06, "loss": 0.5685, "step": 268 }, { "epoch": 0.05985758789497107, "grad_norm": 0.9919044737810889, "learning_rate": 9.977047143509423e-06, "loss": 0.5695, "step": 269 }, { "epoch": 0.06008010680907877, "grad_norm": 1.155976233656301, "learning_rate": 9.97670095951165e-06, "loss": 0.5939, "step": 270 }, { "epoch": 0.06030262572318647, "grad_norm": 1.007804417591958, "learning_rate": 9.976352190472294e-06, "loss": 0.5784, "step": 271 }, { "epoch": 0.06052514463729417, "grad_norm": 0.9763395169442134, "learning_rate": 9.97600083657251e-06, "loss": 0.5597, "step": 272 }, { "epoch": 0.06074766355140187, "grad_norm": 1.0433785273758245, "learning_rate": 9.975646897994804e-06, "loss": 0.5703, "step": 273 }, { "epoch": 0.060970182465509566, "grad_norm": 0.9819001927923985, "learning_rate": 9.975290374923022e-06, "loss": 0.5583, "step": 274 }, { "epoch": 0.06119270137961727, "grad_norm": 1.016241584356013, "learning_rate": 9.974931267542351e-06, "loss": 0.5664, "step": 275 }, { "epoch": 0.06141522029372497, "grad_norm": 0.9578539335123424, "learning_rate": 9.974569576039324e-06, "loss": 0.5647, "step": 276 }, { "epoch": 0.061637739207832665, "grad_norm": 0.8806387148285257, "learning_rate": 9.974205300601809e-06, "loss": 0.5562, "step": 277 }, { "epoch": 0.06186025812194036, "grad_norm": 0.9660109918461719, "learning_rate": 9.973838441419026e-06, "loss": 0.5613, "step": 278 }, { "epoch": 0.062082777036048066, "grad_norm": 1.1060359481536157, "learning_rate": 9.973468998681533e-06, "loss": 0.5736, "step": 279 }, { "epoch": 0.06230529595015576, "grad_norm": 1.0124705385041959, "learning_rate": 9.973096972581225e-06, "loss": 0.5786, "step": 280 }, { "epoch": 0.06252781486426347, "grad_norm": 1.0025841740379475, "learning_rate": 9.972722363311342e-06, "loss": 0.557, "step": 281 }, { "epoch": 0.06275033377837116, "grad_norm": 0.9748592001331086, "learning_rate": 9.972345171066473e-06, "loss": 0.5603, "step": 282 }, { "epoch": 0.06297285269247886, "grad_norm": 1.0824610483644557, "learning_rate": 9.97196539604254e-06, "loss": 0.5739, "step": 283 }, { "epoch": 0.06319537160658656, "grad_norm": 1.0541395151473159, "learning_rate": 9.971583038436805e-06, "loss": 0.5806, "step": 284 }, { "epoch": 0.06341789052069426, "grad_norm": 0.9677234527614827, "learning_rate": 9.971198098447881e-06, "loss": 0.5779, "step": 285 }, { "epoch": 0.06364040943480195, "grad_norm": 1.0679293922180253, "learning_rate": 9.970810576275713e-06, "loss": 0.5701, "step": 286 }, { "epoch": 0.06386292834890965, "grad_norm": 1.056667175155306, "learning_rate": 9.970420472121594e-06, "loss": 0.5798, "step": 287 }, { "epoch": 0.06408544726301736, "grad_norm": 0.9277101089219153, "learning_rate": 9.970027786188155e-06, "loss": 0.5575, "step": 288 }, { "epoch": 0.06430796617712506, "grad_norm": 1.0084909561007644, "learning_rate": 9.969632518679366e-06, "loss": 0.5692, "step": 289 }, { "epoch": 0.06453048509123276, "grad_norm": 1.0237993503791896, "learning_rate": 9.969234669800543e-06, "loss": 0.5783, "step": 290 }, { "epoch": 0.06475300400534045, "grad_norm": 0.9465613204602056, "learning_rate": 9.968834239758339e-06, "loss": 0.559, "step": 291 }, { "epoch": 0.06497552291944815, "grad_norm": 1.0054003720263138, "learning_rate": 9.968431228760749e-06, "loss": 0.573, "step": 292 }, { "epoch": 0.06519804183355585, "grad_norm": 1.0615455015041426, "learning_rate": 9.968025637017107e-06, "loss": 0.5753, "step": 293 }, { "epoch": 0.06542056074766354, "grad_norm": 1.0212493424346951, "learning_rate": 9.96761746473809e-06, "loss": 0.5414, "step": 294 }, { "epoch": 0.06564307966177126, "grad_norm": 1.13427514103369, "learning_rate": 9.967206712135718e-06, "loss": 0.5552, "step": 295 }, { "epoch": 0.06586559857587895, "grad_norm": 0.9466423984855028, "learning_rate": 9.96679337942334e-06, "loss": 0.5625, "step": 296 }, { "epoch": 0.06608811748998665, "grad_norm": 1.0327276755186332, "learning_rate": 9.966377466815662e-06, "loss": 0.5784, "step": 297 }, { "epoch": 0.06631063640409435, "grad_norm": 0.9702971679118286, "learning_rate": 9.965958974528713e-06, "loss": 0.5741, "step": 298 }, { "epoch": 0.06653315531820204, "grad_norm": 1.0204319546101834, "learning_rate": 9.965537902779874e-06, "loss": 0.5682, "step": 299 }, { "epoch": 0.06675567423230974, "grad_norm": 0.9336883675979517, "learning_rate": 9.965114251787862e-06, "loss": 0.5607, "step": 300 }, { "epoch": 0.06697819314641744, "grad_norm": 0.9738834487351696, "learning_rate": 9.964688021772733e-06, "loss": 0.5549, "step": 301 }, { "epoch": 0.06720071206052515, "grad_norm": 0.9506691153767353, "learning_rate": 9.964259212955882e-06, "loss": 0.5747, "step": 302 }, { "epoch": 0.06742323097463285, "grad_norm": 1.0192935960127762, "learning_rate": 9.963827825560044e-06, "loss": 0.5495, "step": 303 }, { "epoch": 0.06764574988874054, "grad_norm": 1.0754358757562597, "learning_rate": 9.963393859809297e-06, "loss": 0.58, "step": 304 }, { "epoch": 0.06786826880284824, "grad_norm": 1.001718822748997, "learning_rate": 9.962957315929054e-06, "loss": 0.5479, "step": 305 }, { "epoch": 0.06809078771695594, "grad_norm": 0.9750314637153149, "learning_rate": 9.962518194146066e-06, "loss": 0.5624, "step": 306 }, { "epoch": 0.06831330663106364, "grad_norm": 1.031071892212203, "learning_rate": 9.962076494688429e-06, "loss": 0.5692, "step": 307 }, { "epoch": 0.06853582554517133, "grad_norm": 1.0376152555888773, "learning_rate": 9.961632217785573e-06, "loss": 0.5629, "step": 308 }, { "epoch": 0.06875834445927904, "grad_norm": 0.9509449879733417, "learning_rate": 9.96118536366827e-06, "loss": 0.5636, "step": 309 }, { "epoch": 0.06898086337338674, "grad_norm": 1.0376326712544763, "learning_rate": 9.960735932568623e-06, "loss": 0.5615, "step": 310 }, { "epoch": 0.06920338228749444, "grad_norm": 0.9484248802797699, "learning_rate": 9.960283924720087e-06, "loss": 0.5681, "step": 311 }, { "epoch": 0.06942590120160214, "grad_norm": 0.9565463377072952, "learning_rate": 9.959829340357444e-06, "loss": 0.5664, "step": 312 }, { "epoch": 0.06964842011570983, "grad_norm": 0.9588387096786557, "learning_rate": 9.959372179716815e-06, "loss": 0.5686, "step": 313 }, { "epoch": 0.06987093902981753, "grad_norm": 0.9544515253250965, "learning_rate": 9.958912443035669e-06, "loss": 0.5759, "step": 314 }, { "epoch": 0.07009345794392523, "grad_norm": 0.959252833424589, "learning_rate": 9.958450130552803e-06, "loss": 0.5598, "step": 315 }, { "epoch": 0.07031597685803294, "grad_norm": 0.9270529719130135, "learning_rate": 9.957985242508356e-06, "loss": 0.5496, "step": 316 }, { "epoch": 0.07053849577214064, "grad_norm": 0.9926191088843231, "learning_rate": 9.957517779143804e-06, "loss": 0.5517, "step": 317 }, { "epoch": 0.07076101468624833, "grad_norm": 1.0841531934147004, "learning_rate": 9.957047740701959e-06, "loss": 0.5514, "step": 318 }, { "epoch": 0.07098353360035603, "grad_norm": 0.9463266388746395, "learning_rate": 9.956575127426978e-06, "loss": 0.575, "step": 319 }, { "epoch": 0.07120605251446373, "grad_norm": 1.0698370209649437, "learning_rate": 9.956099939564343e-06, "loss": 0.5589, "step": 320 }, { "epoch": 0.07142857142857142, "grad_norm": 0.99794993923018, "learning_rate": 9.955622177360885e-06, "loss": 0.565, "step": 321 }, { "epoch": 0.07165109034267912, "grad_norm": 1.1641501495612034, "learning_rate": 9.955141841064766e-06, "loss": 0.567, "step": 322 }, { "epoch": 0.07187360925678683, "grad_norm": 0.9570950765928982, "learning_rate": 9.954658930925487e-06, "loss": 0.5636, "step": 323 }, { "epoch": 0.07209612817089453, "grad_norm": 1.045188806710817, "learning_rate": 9.954173447193885e-06, "loss": 0.5742, "step": 324 }, { "epoch": 0.07231864708500223, "grad_norm": 1.1681912129571577, "learning_rate": 9.953685390122132e-06, "loss": 0.5822, "step": 325 }, { "epoch": 0.07254116599910992, "grad_norm": 1.0528878978328806, "learning_rate": 9.953194759963742e-06, "loss": 0.5681, "step": 326 }, { "epoch": 0.07276368491321762, "grad_norm": 1.0489193281254243, "learning_rate": 9.952701556973563e-06, "loss": 0.5937, "step": 327 }, { "epoch": 0.07298620382732532, "grad_norm": 0.9927496222623773, "learning_rate": 9.952205781407775e-06, "loss": 0.5676, "step": 328 }, { "epoch": 0.07320872274143302, "grad_norm": 1.0583045107789053, "learning_rate": 9.951707433523901e-06, "loss": 0.5745, "step": 329 }, { "epoch": 0.07343124165554073, "grad_norm": 0.9222951770708856, "learning_rate": 9.951206513580798e-06, "loss": 0.5517, "step": 330 }, { "epoch": 0.07365376056964842, "grad_norm": 1.004511135616196, "learning_rate": 9.950703021838655e-06, "loss": 0.5707, "step": 331 }, { "epoch": 0.07387627948375612, "grad_norm": 0.965909535742246, "learning_rate": 9.950196958559001e-06, "loss": 0.5441, "step": 332 }, { "epoch": 0.07409879839786382, "grad_norm": 0.9255993124544086, "learning_rate": 9.949688324004703e-06, "loss": 0.5557, "step": 333 }, { "epoch": 0.07432131731197152, "grad_norm": 1.0143117127567192, "learning_rate": 9.949177118439956e-06, "loss": 0.5465, "step": 334 }, { "epoch": 0.07454383622607921, "grad_norm": 1.0302178165951477, "learning_rate": 9.9486633421303e-06, "loss": 0.5701, "step": 335 }, { "epoch": 0.07476635514018691, "grad_norm": 0.9909647583802595, "learning_rate": 9.948146995342599e-06, "loss": 0.5607, "step": 336 }, { "epoch": 0.07498887405429462, "grad_norm": 1.0724751322403994, "learning_rate": 9.947628078345063e-06, "loss": 0.5592, "step": 337 }, { "epoch": 0.07521139296840232, "grad_norm": 0.9680819744607418, "learning_rate": 9.94710659140723e-06, "loss": 0.5527, "step": 338 }, { "epoch": 0.07543391188251002, "grad_norm": 0.9544062893683789, "learning_rate": 9.946582534799977e-06, "loss": 0.5668, "step": 339 }, { "epoch": 0.07565643079661771, "grad_norm": 0.93027793913841, "learning_rate": 9.946055908795513e-06, "loss": 0.5508, "step": 340 }, { "epoch": 0.07587894971072541, "grad_norm": 1.0456354069102258, "learning_rate": 9.945526713667382e-06, "loss": 0.5679, "step": 341 }, { "epoch": 0.07610146862483311, "grad_norm": 0.9519354525539087, "learning_rate": 9.944994949690466e-06, "loss": 0.5662, "step": 342 }, { "epoch": 0.0763239875389408, "grad_norm": 1.0640266086172148, "learning_rate": 9.944460617140977e-06, "loss": 0.5618, "step": 343 }, { "epoch": 0.07654650645304852, "grad_norm": 0.9761879692464339, "learning_rate": 9.94392371629646e-06, "loss": 0.5673, "step": 344 }, { "epoch": 0.07676902536715621, "grad_norm": 1.005282406533552, "learning_rate": 9.9433842474358e-06, "loss": 0.5622, "step": 345 }, { "epoch": 0.07699154428126391, "grad_norm": 1.0468505849393321, "learning_rate": 9.942842210839212e-06, "loss": 0.5613, "step": 346 }, { "epoch": 0.07721406319537161, "grad_norm": 0.9384798477754936, "learning_rate": 9.942297606788245e-06, "loss": 0.559, "step": 347 }, { "epoch": 0.0774365821094793, "grad_norm": 1.143923237078022, "learning_rate": 9.941750435565782e-06, "loss": 0.5545, "step": 348 }, { "epoch": 0.077659101023587, "grad_norm": 1.0279038312684041, "learning_rate": 9.94120069745604e-06, "loss": 0.5545, "step": 349 }, { "epoch": 0.0778816199376947, "grad_norm": 1.0058111892169155, "learning_rate": 9.940648392744567e-06, "loss": 0.5595, "step": 350 }, { "epoch": 0.07810413885180241, "grad_norm": 1.0341286105985479, "learning_rate": 9.940093521718249e-06, "loss": 0.5632, "step": 351 }, { "epoch": 0.07832665776591011, "grad_norm": 0.9512487511196719, "learning_rate": 9.9395360846653e-06, "loss": 0.5635, "step": 352 }, { "epoch": 0.0785491766800178, "grad_norm": 1.0191085476906696, "learning_rate": 9.938976081875267e-06, "loss": 0.5629, "step": 353 }, { "epoch": 0.0787716955941255, "grad_norm": 1.015282666706043, "learning_rate": 9.938413513639036e-06, "loss": 0.555, "step": 354 }, { "epoch": 0.0789942145082332, "grad_norm": 0.941748024660952, "learning_rate": 9.937848380248817e-06, "loss": 0.5708, "step": 355 }, { "epoch": 0.0792167334223409, "grad_norm": 1.140776729837035, "learning_rate": 9.93728068199816e-06, "loss": 0.554, "step": 356 }, { "epoch": 0.0794392523364486, "grad_norm": 1.0309857055420921, "learning_rate": 9.936710419181943e-06, "loss": 0.5559, "step": 357 }, { "epoch": 0.0796617712505563, "grad_norm": 0.8818432647937654, "learning_rate": 9.936137592096373e-06, "loss": 0.5743, "step": 358 }, { "epoch": 0.079884290164664, "grad_norm": 1.0462309973183146, "learning_rate": 9.935562201038999e-06, "loss": 0.556, "step": 359 }, { "epoch": 0.0801068090787717, "grad_norm": 1.090112034635913, "learning_rate": 9.93498424630869e-06, "loss": 0.5717, "step": 360 }, { "epoch": 0.0803293279928794, "grad_norm": 1.0140852550017772, "learning_rate": 9.934403728205655e-06, "loss": 0.5592, "step": 361 }, { "epoch": 0.08055184690698709, "grad_norm": 1.2122851626556945, "learning_rate": 9.933820647031434e-06, "loss": 0.5606, "step": 362 }, { "epoch": 0.08077436582109479, "grad_norm": 1.0277383039337198, "learning_rate": 9.933235003088893e-06, "loss": 0.5528, "step": 363 }, { "epoch": 0.08099688473520249, "grad_norm": 0.9976753703565092, "learning_rate": 9.93264679668223e-06, "loss": 0.558, "step": 364 }, { "epoch": 0.08121940364931018, "grad_norm": 1.2408382077460034, "learning_rate": 9.932056028116983e-06, "loss": 0.5739, "step": 365 }, { "epoch": 0.0814419225634179, "grad_norm": 1.0901506565071637, "learning_rate": 9.93146269770001e-06, "loss": 0.5669, "step": 366 }, { "epoch": 0.08166444147752559, "grad_norm": 1.062464429216383, "learning_rate": 9.930866805739504e-06, "loss": 0.5714, "step": 367 }, { "epoch": 0.08188696039163329, "grad_norm": 1.1996813468829148, "learning_rate": 9.930268352544987e-06, "loss": 0.568, "step": 368 }, { "epoch": 0.08210947930574099, "grad_norm": 1.1425744342857398, "learning_rate": 9.929667338427315e-06, "loss": 0.5797, "step": 369 }, { "epoch": 0.08233199821984868, "grad_norm": 1.052461383659255, "learning_rate": 9.929063763698675e-06, "loss": 0.5758, "step": 370 }, { "epoch": 0.08255451713395638, "grad_norm": 0.9896933743452804, "learning_rate": 9.928457628672574e-06, "loss": 0.5597, "step": 371 }, { "epoch": 0.08277703604806408, "grad_norm": 1.0612171287149679, "learning_rate": 9.927848933663862e-06, "loss": 0.5565, "step": 372 }, { "epoch": 0.08299955496217179, "grad_norm": 1.031128600617174, "learning_rate": 9.92723767898871e-06, "loss": 0.5844, "step": 373 }, { "epoch": 0.08322207387627949, "grad_norm": 1.0169818059762281, "learning_rate": 9.926623864964622e-06, "loss": 0.5617, "step": 374 }, { "epoch": 0.08344459279038718, "grad_norm": 1.1179232511673025, "learning_rate": 9.92600749191043e-06, "loss": 0.5649, "step": 375 }, { "epoch": 0.08366711170449488, "grad_norm": 0.9605945314368275, "learning_rate": 9.925388560146295e-06, "loss": 0.5628, "step": 376 }, { "epoch": 0.08388963061860258, "grad_norm": 1.102224573772835, "learning_rate": 9.92476706999371e-06, "loss": 0.5542, "step": 377 }, { "epoch": 0.08411214953271028, "grad_norm": 1.0059055432616688, "learning_rate": 9.924143021775494e-06, "loss": 0.5577, "step": 378 }, { "epoch": 0.08433466844681797, "grad_norm": 1.022582008144535, "learning_rate": 9.923516415815797e-06, "loss": 0.566, "step": 379 }, { "epoch": 0.08455718736092568, "grad_norm": 1.010216728928507, "learning_rate": 9.922887252440093e-06, "loss": 0.5485, "step": 380 }, { "epoch": 0.08477970627503338, "grad_norm": 1.086282253168849, "learning_rate": 9.92225553197519e-06, "loss": 0.5522, "step": 381 }, { "epoch": 0.08500222518914108, "grad_norm": 1.0100827972685205, "learning_rate": 9.92162125474922e-06, "loss": 0.5585, "step": 382 }, { "epoch": 0.08522474410324878, "grad_norm": 1.0224009167816395, "learning_rate": 9.920984421091649e-06, "loss": 0.5621, "step": 383 }, { "epoch": 0.08544726301735647, "grad_norm": 1.026081066733623, "learning_rate": 9.92034503133326e-06, "loss": 0.5781, "step": 384 }, { "epoch": 0.08566978193146417, "grad_norm": 0.9775579254096854, "learning_rate": 9.919703085806176e-06, "loss": 0.5765, "step": 385 }, { "epoch": 0.08589230084557187, "grad_norm": 1.09093019325935, "learning_rate": 9.919058584843839e-06, "loss": 0.5646, "step": 386 }, { "epoch": 0.08611481975967958, "grad_norm": 0.9360989642082141, "learning_rate": 9.918411528781024e-06, "loss": 0.5428, "step": 387 }, { "epoch": 0.08633733867378728, "grad_norm": 0.854576064336328, "learning_rate": 9.917761917953827e-06, "loss": 0.5558, "step": 388 }, { "epoch": 0.08655985758789497, "grad_norm": 0.9742501750678989, "learning_rate": 9.917109752699677e-06, "loss": 0.546, "step": 389 }, { "epoch": 0.08678237650200267, "grad_norm": 0.978240374551664, "learning_rate": 9.916455033357327e-06, "loss": 0.5779, "step": 390 }, { "epoch": 0.08700489541611037, "grad_norm": 1.028068207539001, "learning_rate": 9.915797760266857e-06, "loss": 0.5688, "step": 391 }, { "epoch": 0.08722741433021806, "grad_norm": 1.0138464094734325, "learning_rate": 9.915137933769674e-06, "loss": 0.5667, "step": 392 }, { "epoch": 0.08744993324432576, "grad_norm": 0.9681516025276924, "learning_rate": 9.914475554208509e-06, "loss": 0.561, "step": 393 }, { "epoch": 0.08767245215843347, "grad_norm": 0.9653995718101175, "learning_rate": 9.913810621927423e-06, "loss": 0.5579, "step": 394 }, { "epoch": 0.08789497107254117, "grad_norm": 1.120324986363825, "learning_rate": 9.9131431372718e-06, "loss": 0.575, "step": 395 }, { "epoch": 0.08811748998664887, "grad_norm": 0.9297441203732284, "learning_rate": 9.91247310058835e-06, "loss": 0.5558, "step": 396 }, { "epoch": 0.08834000890075656, "grad_norm": 0.960935373993475, "learning_rate": 9.911800512225113e-06, "loss": 0.5585, "step": 397 }, { "epoch": 0.08856252781486426, "grad_norm": 0.9693859437301314, "learning_rate": 9.911125372531445e-06, "loss": 0.5671, "step": 398 }, { "epoch": 0.08878504672897196, "grad_norm": 0.9387424054383281, "learning_rate": 9.910447681858037e-06, "loss": 0.5461, "step": 399 }, { "epoch": 0.08900756564307966, "grad_norm": 0.9375762564244922, "learning_rate": 9.9097674405569e-06, "loss": 0.574, "step": 400 }, { "epoch": 0.08923008455718737, "grad_norm": 0.9571990522524214, "learning_rate": 9.909084648981373e-06, "loss": 0.5645, "step": 401 }, { "epoch": 0.08945260347129506, "grad_norm": 0.8953305342965837, "learning_rate": 9.908399307486113e-06, "loss": 0.5819, "step": 402 }, { "epoch": 0.08967512238540276, "grad_norm": 0.9833468232183115, "learning_rate": 9.907711416427108e-06, "loss": 0.5485, "step": 403 }, { "epoch": 0.08989764129951046, "grad_norm": 0.9514521505064014, "learning_rate": 9.907020976161672e-06, "loss": 0.5614, "step": 404 }, { "epoch": 0.09012016021361816, "grad_norm": 0.9908651665266024, "learning_rate": 9.906327987048435e-06, "loss": 0.5436, "step": 405 }, { "epoch": 0.09034267912772585, "grad_norm": 0.9603938685002543, "learning_rate": 9.905632449447355e-06, "loss": 0.5759, "step": 406 }, { "epoch": 0.09056519804183355, "grad_norm": 1.0430443623652197, "learning_rate": 9.904934363719719e-06, "loss": 0.574, "step": 407 }, { "epoch": 0.09078771695594126, "grad_norm": 1.0251156055778972, "learning_rate": 9.904233730228126e-06, "loss": 0.5601, "step": 408 }, { "epoch": 0.09101023587004896, "grad_norm": 0.8700414140486015, "learning_rate": 9.903530549336513e-06, "loss": 0.5429, "step": 409 }, { "epoch": 0.09123275478415666, "grad_norm": 0.9993811470751671, "learning_rate": 9.902824821410126e-06, "loss": 0.568, "step": 410 }, { "epoch": 0.09145527369826435, "grad_norm": 1.0115568005213509, "learning_rate": 9.902116546815545e-06, "loss": 0.573, "step": 411 }, { "epoch": 0.09167779261237205, "grad_norm": 0.9635909017150915, "learning_rate": 9.901405725920665e-06, "loss": 0.5515, "step": 412 }, { "epoch": 0.09190031152647975, "grad_norm": 1.0707694073963376, "learning_rate": 9.900692359094708e-06, "loss": 0.559, "step": 413 }, { "epoch": 0.09212283044058744, "grad_norm": 0.9550842628460477, "learning_rate": 9.899976446708217e-06, "loss": 0.5391, "step": 414 }, { "epoch": 0.09234534935469516, "grad_norm": 0.966927980212262, "learning_rate": 9.899257989133057e-06, "loss": 0.5486, "step": 415 }, { "epoch": 0.09256786826880285, "grad_norm": 1.1301963218209936, "learning_rate": 9.898536986742418e-06, "loss": 0.5651, "step": 416 }, { "epoch": 0.09279038718291055, "grad_norm": 0.986720351417856, "learning_rate": 9.897813439910806e-06, "loss": 0.5618, "step": 417 }, { "epoch": 0.09301290609701825, "grad_norm": 1.0463281971066116, "learning_rate": 9.897087349014054e-06, "loss": 0.5479, "step": 418 }, { "epoch": 0.09323542501112594, "grad_norm": 2.1425078405029856, "learning_rate": 9.896358714429315e-06, "loss": 0.5715, "step": 419 }, { "epoch": 0.09345794392523364, "grad_norm": 0.9596315173884928, "learning_rate": 9.89562753653506e-06, "loss": 0.5538, "step": 420 }, { "epoch": 0.09368046283934134, "grad_norm": 1.1712758677300557, "learning_rate": 9.894893815711087e-06, "loss": 0.5673, "step": 421 }, { "epoch": 0.09390298175344905, "grad_norm": 1.0026010854160448, "learning_rate": 9.894157552338511e-06, "loss": 0.5391, "step": 422 }, { "epoch": 0.09412550066755675, "grad_norm": 1.043859336120038, "learning_rate": 9.893418746799766e-06, "loss": 0.5513, "step": 423 }, { "epoch": 0.09434801958166444, "grad_norm": 1.0635603138511274, "learning_rate": 9.89267739947861e-06, "loss": 0.5737, "step": 424 }, { "epoch": 0.09457053849577214, "grad_norm": 0.9643563238106039, "learning_rate": 9.891933510760123e-06, "loss": 0.5615, "step": 425 }, { "epoch": 0.09479305740987984, "grad_norm": 1.0294754339916878, "learning_rate": 9.891187081030698e-06, "loss": 0.5715, "step": 426 }, { "epoch": 0.09501557632398754, "grad_norm": 0.9700121065582694, "learning_rate": 9.890438110678053e-06, "loss": 0.5365, "step": 427 }, { "epoch": 0.09523809523809523, "grad_norm": 0.9509530544653623, "learning_rate": 9.889686600091228e-06, "loss": 0.5609, "step": 428 }, { "epoch": 0.09546061415220294, "grad_norm": 1.0436462349398006, "learning_rate": 9.888932549660576e-06, "loss": 0.5755, "step": 429 }, { "epoch": 0.09568313306631064, "grad_norm": 0.9737621769015284, "learning_rate": 9.888175959777772e-06, "loss": 0.5618, "step": 430 }, { "epoch": 0.09590565198041834, "grad_norm": 0.9373349191030348, "learning_rate": 9.887416830835814e-06, "loss": 0.5437, "step": 431 }, { "epoch": 0.09612817089452604, "grad_norm": 1.013198022642664, "learning_rate": 9.886655163229014e-06, "loss": 0.5558, "step": 432 }, { "epoch": 0.09635068980863373, "grad_norm": 1.1363465404332296, "learning_rate": 9.885890957353e-06, "loss": 0.5578, "step": 433 }, { "epoch": 0.09657320872274143, "grad_norm": 0.9796965122902668, "learning_rate": 9.885124213604728e-06, "loss": 0.5584, "step": 434 }, { "epoch": 0.09679572763684913, "grad_norm": 1.0850816262642824, "learning_rate": 9.884354932382464e-06, "loss": 0.5621, "step": 435 }, { "epoch": 0.09701824655095684, "grad_norm": 1.0054452989333162, "learning_rate": 9.883583114085795e-06, "loss": 0.5543, "step": 436 }, { "epoch": 0.09724076546506454, "grad_norm": 0.9873069142221519, "learning_rate": 9.882808759115628e-06, "loss": 0.5475, "step": 437 }, { "epoch": 0.09746328437917223, "grad_norm": 1.104222092410499, "learning_rate": 9.88203186787418e-06, "loss": 0.5545, "step": 438 }, { "epoch": 0.09768580329327993, "grad_norm": 1.0176242432692066, "learning_rate": 9.881252440764997e-06, "loss": 0.5642, "step": 439 }, { "epoch": 0.09790832220738763, "grad_norm": 1.0297716720771724, "learning_rate": 9.880470478192932e-06, "loss": 0.5674, "step": 440 }, { "epoch": 0.09813084112149532, "grad_norm": 1.0230481451000555, "learning_rate": 9.879685980564158e-06, "loss": 0.563, "step": 441 }, { "epoch": 0.09835336003560302, "grad_norm": 0.8904709622700603, "learning_rate": 9.878898948286169e-06, "loss": 0.5489, "step": 442 }, { "epoch": 0.09857587894971072, "grad_norm": 1.0210323017118244, "learning_rate": 9.878109381767769e-06, "loss": 0.5483, "step": 443 }, { "epoch": 0.09879839786381843, "grad_norm": 0.9584467852614555, "learning_rate": 9.877317281419083e-06, "loss": 0.5782, "step": 444 }, { "epoch": 0.09902091677792613, "grad_norm": 0.9350818590366743, "learning_rate": 9.876522647651552e-06, "loss": 0.5559, "step": 445 }, { "epoch": 0.09924343569203382, "grad_norm": 0.9557365408456828, "learning_rate": 9.875725480877929e-06, "loss": 0.5604, "step": 446 }, { "epoch": 0.09946595460614152, "grad_norm": 0.9722843710684302, "learning_rate": 9.874925781512287e-06, "loss": 0.5667, "step": 447 }, { "epoch": 0.09968847352024922, "grad_norm": 1.038920623286856, "learning_rate": 9.874123549970011e-06, "loss": 0.5494, "step": 448 }, { "epoch": 0.09991099243435692, "grad_norm": 0.9525877349453481, "learning_rate": 9.873318786667808e-06, "loss": 0.5421, "step": 449 }, { "epoch": 0.10013351134846461, "grad_norm": 1.0346284358358517, "learning_rate": 9.872511492023688e-06, "loss": 0.5658, "step": 450 }, { "epoch": 0.10035603026257232, "grad_norm": 0.9990495313281434, "learning_rate": 9.87170166645699e-06, "loss": 0.5619, "step": 451 }, { "epoch": 0.10057854917668002, "grad_norm": 1.0951135229495494, "learning_rate": 9.870889310388356e-06, "loss": 0.5751, "step": 452 }, { "epoch": 0.10080106809078772, "grad_norm": 1.0137646290558977, "learning_rate": 9.87007442423975e-06, "loss": 0.5531, "step": 453 }, { "epoch": 0.10102358700489542, "grad_norm": 0.9437262215831708, "learning_rate": 9.869257008434445e-06, "loss": 0.5504, "step": 454 }, { "epoch": 0.10124610591900311, "grad_norm": 1.1696825622044038, "learning_rate": 9.868437063397031e-06, "loss": 0.5453, "step": 455 }, { "epoch": 0.10146862483311081, "grad_norm": 0.9305752463277259, "learning_rate": 9.867614589553412e-06, "loss": 0.5534, "step": 456 }, { "epoch": 0.10169114374721851, "grad_norm": 1.032381634937158, "learning_rate": 9.866789587330803e-06, "loss": 0.5799, "step": 457 }, { "epoch": 0.10191366266132622, "grad_norm": 1.0477299913747595, "learning_rate": 9.865962057157734e-06, "loss": 0.552, "step": 458 }, { "epoch": 0.10213618157543392, "grad_norm": 0.9903818179405977, "learning_rate": 9.86513199946405e-06, "loss": 0.5471, "step": 459 }, { "epoch": 0.10235870048954161, "grad_norm": 0.9857740155063831, "learning_rate": 9.864299414680904e-06, "loss": 0.5633, "step": 460 }, { "epoch": 0.10258121940364931, "grad_norm": 1.015597159570563, "learning_rate": 9.863464303240768e-06, "loss": 0.5635, "step": 461 }, { "epoch": 0.102803738317757, "grad_norm": 0.9365030488735377, "learning_rate": 9.86262666557742e-06, "loss": 0.5643, "step": 462 }, { "epoch": 0.1030262572318647, "grad_norm": 0.9751176523427865, "learning_rate": 9.861786502125954e-06, "loss": 0.5809, "step": 463 }, { "epoch": 0.1032487761459724, "grad_norm": 0.8707179683810261, "learning_rate": 9.860943813322776e-06, "loss": 0.5558, "step": 464 }, { "epoch": 0.10347129506008011, "grad_norm": 0.9090064039353634, "learning_rate": 9.8600985996056e-06, "loss": 0.5569, "step": 465 }, { "epoch": 0.10369381397418781, "grad_norm": 0.979993415425757, "learning_rate": 9.859250861413456e-06, "loss": 0.5545, "step": 466 }, { "epoch": 0.1039163328882955, "grad_norm": 0.9729939652653935, "learning_rate": 9.858400599186686e-06, "loss": 0.5693, "step": 467 }, { "epoch": 0.1041388518024032, "grad_norm": 0.9910720495534504, "learning_rate": 9.857547813366937e-06, "loss": 0.5509, "step": 468 }, { "epoch": 0.1043613707165109, "grad_norm": 0.9837508383221555, "learning_rate": 9.856692504397171e-06, "loss": 0.5492, "step": 469 }, { "epoch": 0.1045838896306186, "grad_norm": 0.9554201508125815, "learning_rate": 9.855834672721662e-06, "loss": 0.5594, "step": 470 }, { "epoch": 0.1048064085447263, "grad_norm": 1.0023638174728076, "learning_rate": 9.85497431878599e-06, "loss": 0.5577, "step": 471 }, { "epoch": 0.105028927458834, "grad_norm": 0.953732673386536, "learning_rate": 9.85411144303705e-06, "loss": 0.5331, "step": 472 }, { "epoch": 0.1052514463729417, "grad_norm": 1.0075631305488437, "learning_rate": 9.853246045923043e-06, "loss": 0.5542, "step": 473 }, { "epoch": 0.1054739652870494, "grad_norm": 0.9625594482976764, "learning_rate": 9.852378127893483e-06, "loss": 0.5453, "step": 474 }, { "epoch": 0.1056964842011571, "grad_norm": 0.9438541485817031, "learning_rate": 9.851507689399189e-06, "loss": 0.5498, "step": 475 }, { "epoch": 0.1059190031152648, "grad_norm": 0.9675123072989792, "learning_rate": 9.850634730892294e-06, "loss": 0.5661, "step": 476 }, { "epoch": 0.10614152202937249, "grad_norm": 0.9877657377413239, "learning_rate": 9.849759252826236e-06, "loss": 0.5488, "step": 477 }, { "epoch": 0.10636404094348019, "grad_norm": 0.9820229893852253, "learning_rate": 9.848881255655763e-06, "loss": 0.5421, "step": 478 }, { "epoch": 0.1065865598575879, "grad_norm": 0.9989391374386685, "learning_rate": 9.848000739836934e-06, "loss": 0.5426, "step": 479 }, { "epoch": 0.1068090787716956, "grad_norm": 0.8994590537049945, "learning_rate": 9.847117705827114e-06, "loss": 0.5743, "step": 480 }, { "epoch": 0.1070315976858033, "grad_norm": 0.9748495850026638, "learning_rate": 9.846232154084973e-06, "loss": 0.5531, "step": 481 }, { "epoch": 0.10725411659991099, "grad_norm": 1.018106957210122, "learning_rate": 9.845344085070498e-06, "loss": 0.5695, "step": 482 }, { "epoch": 0.10747663551401869, "grad_norm": 0.9163792048758723, "learning_rate": 9.844453499244973e-06, "loss": 0.5565, "step": 483 }, { "epoch": 0.10769915442812639, "grad_norm": 1.1149713337895897, "learning_rate": 9.843560397070994e-06, "loss": 0.5667, "step": 484 }, { "epoch": 0.10792167334223408, "grad_norm": 0.9853368869711769, "learning_rate": 9.842664779012468e-06, "loss": 0.5323, "step": 485 }, { "epoch": 0.1081441922563418, "grad_norm": 1.0148754451710347, "learning_rate": 9.8417666455346e-06, "loss": 0.546, "step": 486 }, { "epoch": 0.10836671117044949, "grad_norm": 0.9190783913010255, "learning_rate": 9.840865997103908e-06, "loss": 0.5515, "step": 487 }, { "epoch": 0.10858923008455719, "grad_norm": 0.9537724551857988, "learning_rate": 9.839962834188214e-06, "loss": 0.5487, "step": 488 }, { "epoch": 0.10881174899866489, "grad_norm": 0.9428269840850623, "learning_rate": 9.83905715725665e-06, "loss": 0.552, "step": 489 }, { "epoch": 0.10903426791277258, "grad_norm": 0.9935769872484808, "learning_rate": 9.838148966779646e-06, "loss": 0.5495, "step": 490 }, { "epoch": 0.10925678682688028, "grad_norm": 1.025871020466048, "learning_rate": 9.837238263228946e-06, "loss": 0.5479, "step": 491 }, { "epoch": 0.10947930574098798, "grad_norm": 0.926691761718658, "learning_rate": 9.836325047077594e-06, "loss": 0.5545, "step": 492 }, { "epoch": 0.10970182465509569, "grad_norm": 1.0583856967928522, "learning_rate": 9.83540931879994e-06, "loss": 0.5621, "step": 493 }, { "epoch": 0.10992434356920339, "grad_norm": 1.1497941915506693, "learning_rate": 9.834491078871641e-06, "loss": 0.5605, "step": 494 }, { "epoch": 0.11014686248331108, "grad_norm": 1.0721726385515984, "learning_rate": 9.83357032776966e-06, "loss": 0.5414, "step": 495 }, { "epoch": 0.11036938139741878, "grad_norm": 0.9263031706590646, "learning_rate": 9.832647065972254e-06, "loss": 0.551, "step": 496 }, { "epoch": 0.11059190031152648, "grad_norm": 0.8722815772236665, "learning_rate": 9.831721293958998e-06, "loss": 0.5389, "step": 497 }, { "epoch": 0.11081441922563418, "grad_norm": 1.0137049956160964, "learning_rate": 9.830793012210763e-06, "loss": 0.5535, "step": 498 }, { "epoch": 0.11103693813974187, "grad_norm": 1.0266461425400135, "learning_rate": 9.829862221209723e-06, "loss": 0.5535, "step": 499 }, { "epoch": 0.11125945705384958, "grad_norm": 0.8897471079519594, "learning_rate": 9.82892892143936e-06, "loss": 0.5615, "step": 500 }, { "epoch": 0.11148197596795728, "grad_norm": 0.8907394146607435, "learning_rate": 9.827993113384458e-06, "loss": 0.5545, "step": 501 }, { "epoch": 0.11170449488206498, "grad_norm": 1.0415467252488535, "learning_rate": 9.827054797531099e-06, "loss": 0.5626, "step": 502 }, { "epoch": 0.11192701379617268, "grad_norm": 0.9459652674492752, "learning_rate": 9.826113974366676e-06, "loss": 0.5649, "step": 503 }, { "epoch": 0.11214953271028037, "grad_norm": 1.0125555367741406, "learning_rate": 9.825170644379874e-06, "loss": 0.5507, "step": 504 }, { "epoch": 0.11237205162438807, "grad_norm": 0.9822377432456207, "learning_rate": 9.82422480806069e-06, "loss": 0.5564, "step": 505 }, { "epoch": 0.11259457053849577, "grad_norm": 0.8531026172776778, "learning_rate": 9.823276465900416e-06, "loss": 0.5531, "step": 506 }, { "epoch": 0.11281708945260348, "grad_norm": 0.8646454189067118, "learning_rate": 9.822325618391649e-06, "loss": 0.5586, "step": 507 }, { "epoch": 0.11303960836671118, "grad_norm": 1.133022173291706, "learning_rate": 9.821372266028285e-06, "loss": 0.5518, "step": 508 }, { "epoch": 0.11326212728081887, "grad_norm": 1.2517970176132143, "learning_rate": 9.82041640930553e-06, "loss": 0.5591, "step": 509 }, { "epoch": 0.11348464619492657, "grad_norm": 0.9062553983290439, "learning_rate": 9.819458048719871e-06, "loss": 0.5281, "step": 510 }, { "epoch": 0.11370716510903427, "grad_norm": 1.0052361432055317, "learning_rate": 9.81849718476912e-06, "loss": 0.5659, "step": 511 }, { "epoch": 0.11392968402314196, "grad_norm": 1.049428343878516, "learning_rate": 9.81753381795237e-06, "loss": 0.5681, "step": 512 }, { "epoch": 0.11415220293724966, "grad_norm": 0.9436781099956104, "learning_rate": 9.816567948770024e-06, "loss": 0.5519, "step": 513 }, { "epoch": 0.11437472185135737, "grad_norm": 0.9274572097164743, "learning_rate": 9.815599577723782e-06, "loss": 0.5662, "step": 514 }, { "epoch": 0.11459724076546507, "grad_norm": 1.0727106418631354, "learning_rate": 9.814628705316645e-06, "loss": 0.5666, "step": 515 }, { "epoch": 0.11481975967957277, "grad_norm": 1.0557635636547953, "learning_rate": 9.81365533205291e-06, "loss": 0.5395, "step": 516 }, { "epoch": 0.11504227859368046, "grad_norm": 0.970381692083186, "learning_rate": 9.812679458438174e-06, "loss": 0.5294, "step": 517 }, { "epoch": 0.11526479750778816, "grad_norm": 1.1303205602773803, "learning_rate": 9.811701084979337e-06, "loss": 0.5406, "step": 518 }, { "epoch": 0.11548731642189586, "grad_norm": 1.1236560862686409, "learning_rate": 9.810720212184593e-06, "loss": 0.5345, "step": 519 }, { "epoch": 0.11570983533600356, "grad_norm": 1.0659925392826386, "learning_rate": 9.809736840563435e-06, "loss": 0.5642, "step": 520 }, { "epoch": 0.11593235425011127, "grad_norm": 1.0027130910189863, "learning_rate": 9.808750970626655e-06, "loss": 0.5583, "step": 521 }, { "epoch": 0.11615487316421896, "grad_norm": 0.939577079576889, "learning_rate": 9.807762602886343e-06, "loss": 0.5499, "step": 522 }, { "epoch": 0.11637739207832666, "grad_norm": 1.0584873266450106, "learning_rate": 9.806771737855885e-06, "loss": 0.5684, "step": 523 }, { "epoch": 0.11659991099243436, "grad_norm": 1.1346321786096891, "learning_rate": 9.805778376049964e-06, "loss": 0.5621, "step": 524 }, { "epoch": 0.11682242990654206, "grad_norm": 1.0463613165959311, "learning_rate": 9.804782517984561e-06, "loss": 0.5459, "step": 525 }, { "epoch": 0.11704494882064975, "grad_norm": 1.0049567093115999, "learning_rate": 9.803784164176953e-06, "loss": 0.5498, "step": 526 }, { "epoch": 0.11726746773475745, "grad_norm": 1.016442048046811, "learning_rate": 9.802783315145715e-06, "loss": 0.5756, "step": 527 }, { "epoch": 0.11748998664886515, "grad_norm": 0.9171076405006585, "learning_rate": 9.801779971410717e-06, "loss": 0.556, "step": 528 }, { "epoch": 0.11771250556297286, "grad_norm": 0.9836734065654165, "learning_rate": 9.800774133493121e-06, "loss": 0.5503, "step": 529 }, { "epoch": 0.11793502447708056, "grad_norm": 0.9538876791732545, "learning_rate": 9.799765801915393e-06, "loss": 0.5575, "step": 530 }, { "epoch": 0.11815754339118825, "grad_norm": 0.9630076289285693, "learning_rate": 9.798754977201285e-06, "loss": 0.5467, "step": 531 }, { "epoch": 0.11838006230529595, "grad_norm": 0.903708812600004, "learning_rate": 9.797741659875852e-06, "loss": 0.5438, "step": 532 }, { "epoch": 0.11860258121940365, "grad_norm": 0.9445322061370665, "learning_rate": 9.79672585046544e-06, "loss": 0.5667, "step": 533 }, { "epoch": 0.11882510013351134, "grad_norm": 0.97949859732014, "learning_rate": 9.795707549497685e-06, "loss": 0.5751, "step": 534 }, { "epoch": 0.11904761904761904, "grad_norm": 0.909750638818079, "learning_rate": 9.794686757501529e-06, "loss": 0.5522, "step": 535 }, { "epoch": 0.11927013796172675, "grad_norm": 0.947968745003621, "learning_rate": 9.793663475007196e-06, "loss": 0.5536, "step": 536 }, { "epoch": 0.11949265687583445, "grad_norm": 0.9475541108782485, "learning_rate": 9.792637702546207e-06, "loss": 0.5549, "step": 537 }, { "epoch": 0.11971517578994215, "grad_norm": 1.1700833514747555, "learning_rate": 9.791609440651382e-06, "loss": 0.5368, "step": 538 }, { "epoch": 0.11993769470404984, "grad_norm": 0.9608074999756434, "learning_rate": 9.790578689856826e-06, "loss": 0.5614, "step": 539 }, { "epoch": 0.12016021361815754, "grad_norm": 0.9102641524012285, "learning_rate": 9.789545450697944e-06, "loss": 0.5476, "step": 540 }, { "epoch": 0.12038273253226524, "grad_norm": 0.9808693181547891, "learning_rate": 9.788509723711427e-06, "loss": 0.5445, "step": 541 }, { "epoch": 0.12060525144637294, "grad_norm": 0.9482103903245619, "learning_rate": 9.787471509435264e-06, "loss": 0.5527, "step": 542 }, { "epoch": 0.12082777036048065, "grad_norm": 0.900937610605747, "learning_rate": 9.786430808408731e-06, "loss": 0.5413, "step": 543 }, { "epoch": 0.12105028927458834, "grad_norm": 0.9675005275518441, "learning_rate": 9.7853876211724e-06, "loss": 0.5491, "step": 544 }, { "epoch": 0.12127280818869604, "grad_norm": 0.9781608321116622, "learning_rate": 9.784341948268132e-06, "loss": 0.5725, "step": 545 }, { "epoch": 0.12149532710280374, "grad_norm": 1.0453824906277878, "learning_rate": 9.783293790239079e-06, "loss": 0.5489, "step": 546 }, { "epoch": 0.12171784601691144, "grad_norm": 0.9391659760108167, "learning_rate": 9.782243147629686e-06, "loss": 0.5532, "step": 547 }, { "epoch": 0.12194036493101913, "grad_norm": 0.9478607624476669, "learning_rate": 9.781190020985683e-06, "loss": 0.5385, "step": 548 }, { "epoch": 0.12216288384512683, "grad_norm": 0.9592952233368316, "learning_rate": 9.780134410854098e-06, "loss": 0.5423, "step": 549 }, { "epoch": 0.12238540275923454, "grad_norm": 0.9005640840106784, "learning_rate": 9.779076317783245e-06, "loss": 0.5358, "step": 550 }, { "epoch": 0.12260792167334224, "grad_norm": 0.9048708150403064, "learning_rate": 9.778015742322725e-06, "loss": 0.5523, "step": 551 }, { "epoch": 0.12283044058744993, "grad_norm": 0.9687030781512244, "learning_rate": 9.776952685023437e-06, "loss": 0.5645, "step": 552 }, { "epoch": 0.12305295950155763, "grad_norm": 0.8600284634566698, "learning_rate": 9.775887146437558e-06, "loss": 0.547, "step": 553 }, { "epoch": 0.12327547841566533, "grad_norm": 0.9342279451466626, "learning_rate": 9.774819127118561e-06, "loss": 0.5478, "step": 554 }, { "epoch": 0.12349799732977303, "grad_norm": 0.9328386108063909, "learning_rate": 9.773748627621208e-06, "loss": 0.554, "step": 555 }, { "epoch": 0.12372051624388072, "grad_norm": 0.8943719231431844, "learning_rate": 9.772675648501544e-06, "loss": 0.5338, "step": 556 }, { "epoch": 0.12394303515798843, "grad_norm": 0.8926005057781674, "learning_rate": 9.771600190316907e-06, "loss": 0.5397, "step": 557 }, { "epoch": 0.12416555407209613, "grad_norm": 0.8739989077560226, "learning_rate": 9.770522253625923e-06, "loss": 0.5448, "step": 558 }, { "epoch": 0.12438807298620383, "grad_norm": 0.9606025349384173, "learning_rate": 9.769441838988498e-06, "loss": 0.5469, "step": 559 }, { "epoch": 0.12461059190031153, "grad_norm": 0.9449476205829165, "learning_rate": 9.768358946965835e-06, "loss": 0.5534, "step": 560 }, { "epoch": 0.12483311081441922, "grad_norm": 0.9324357478117812, "learning_rate": 9.767273578120417e-06, "loss": 0.5469, "step": 561 }, { "epoch": 0.12505562972852693, "grad_norm": 0.9811536096591655, "learning_rate": 9.766185733016018e-06, "loss": 0.5346, "step": 562 }, { "epoch": 0.12527814864263462, "grad_norm": 0.962163416824528, "learning_rate": 9.765095412217693e-06, "loss": 0.5635, "step": 563 }, { "epoch": 0.12550066755674233, "grad_norm": 0.9202073621086627, "learning_rate": 9.764002616291788e-06, "loss": 0.5523, "step": 564 }, { "epoch": 0.12572318647085, "grad_norm": 0.8748541671676778, "learning_rate": 9.762907345805932e-06, "loss": 0.5549, "step": 565 }, { "epoch": 0.12594570538495772, "grad_norm": 0.9128863666260144, "learning_rate": 9.761809601329042e-06, "loss": 0.5523, "step": 566 }, { "epoch": 0.1261682242990654, "grad_norm": 0.9577366748307036, "learning_rate": 9.760709383431315e-06, "loss": 0.5439, "step": 567 }, { "epoch": 0.12639074321317312, "grad_norm": 0.9588155182760291, "learning_rate": 9.759606692684236e-06, "loss": 0.5537, "step": 568 }, { "epoch": 0.12661326212728083, "grad_norm": 0.8867704240827029, "learning_rate": 9.758501529660575e-06, "loss": 0.5363, "step": 569 }, { "epoch": 0.1268357810413885, "grad_norm": 0.8549714114331205, "learning_rate": 9.757393894934387e-06, "loss": 0.5588, "step": 570 }, { "epoch": 0.12705829995549622, "grad_norm": 0.9756822117695142, "learning_rate": 9.756283789081006e-06, "loss": 0.5705, "step": 571 }, { "epoch": 0.1272808188696039, "grad_norm": 0.967951376053242, "learning_rate": 9.755171212677058e-06, "loss": 0.5453, "step": 572 }, { "epoch": 0.12750333778371162, "grad_norm": 0.8975388003682269, "learning_rate": 9.754056166300443e-06, "loss": 0.5379, "step": 573 }, { "epoch": 0.1277258566978193, "grad_norm": 0.9822351666045825, "learning_rate": 9.75293865053035e-06, "loss": 0.5417, "step": 574 }, { "epoch": 0.127948375611927, "grad_norm": 0.9100504647117867, "learning_rate": 9.751818665947245e-06, "loss": 0.5291, "step": 575 }, { "epoch": 0.12817089452603472, "grad_norm": 0.9090789997977669, "learning_rate": 9.750696213132887e-06, "loss": 0.5368, "step": 576 }, { "epoch": 0.1283934134401424, "grad_norm": 0.9701021367015998, "learning_rate": 9.749571292670305e-06, "loss": 0.5437, "step": 577 }, { "epoch": 0.12861593235425012, "grad_norm": 0.8772690916668321, "learning_rate": 9.748443905143816e-06, "loss": 0.5321, "step": 578 }, { "epoch": 0.1288384512683578, "grad_norm": 0.861953323474335, "learning_rate": 9.74731405113902e-06, "loss": 0.5304, "step": 579 }, { "epoch": 0.1290609701824655, "grad_norm": 0.959547090013277, "learning_rate": 9.746181731242793e-06, "loss": 0.5311, "step": 580 }, { "epoch": 0.1292834890965732, "grad_norm": 0.9374678486939397, "learning_rate": 9.745046946043296e-06, "loss": 0.5436, "step": 581 }, { "epoch": 0.1295060080106809, "grad_norm": 0.9225670913155629, "learning_rate": 9.743909696129967e-06, "loss": 0.5447, "step": 582 }, { "epoch": 0.12972852692478862, "grad_norm": 0.8924833873396287, "learning_rate": 9.742769982093528e-06, "loss": 0.5414, "step": 583 }, { "epoch": 0.1299510458388963, "grad_norm": 0.9299781480835446, "learning_rate": 9.741627804525978e-06, "loss": 0.5611, "step": 584 }, { "epoch": 0.130173564753004, "grad_norm": 1.0583736397247987, "learning_rate": 9.7404831640206e-06, "loss": 0.5547, "step": 585 }, { "epoch": 0.1303960836671117, "grad_norm": 0.9042011079902754, "learning_rate": 9.739336061171949e-06, "loss": 0.5532, "step": 586 }, { "epoch": 0.1306186025812194, "grad_norm": 0.8556844896511209, "learning_rate": 9.738186496575865e-06, "loss": 0.5412, "step": 587 }, { "epoch": 0.1308411214953271, "grad_norm": 0.9430691489934626, "learning_rate": 9.737034470829467e-06, "loss": 0.5366, "step": 588 }, { "epoch": 0.1310636404094348, "grad_norm": 0.8959981238919855, "learning_rate": 9.735879984531147e-06, "loss": 0.5496, "step": 589 }, { "epoch": 0.1312861593235425, "grad_norm": 0.9835083926952387, "learning_rate": 9.73472303828058e-06, "loss": 0.5585, "step": 590 }, { "epoch": 0.1315086782376502, "grad_norm": 0.8868632053239338, "learning_rate": 9.733563632678717e-06, "loss": 0.5438, "step": 591 }, { "epoch": 0.1317311971517579, "grad_norm": 0.8721758180305875, "learning_rate": 9.732401768327787e-06, "loss": 0.546, "step": 592 }, { "epoch": 0.1319537160658656, "grad_norm": 2.5233165491776934, "learning_rate": 9.731237445831295e-06, "loss": 0.5505, "step": 593 }, { "epoch": 0.1321762349799733, "grad_norm": 1.02006797723225, "learning_rate": 9.730070665794024e-06, "loss": 0.544, "step": 594 }, { "epoch": 0.13239875389408098, "grad_norm": 0.9575246422173022, "learning_rate": 9.728901428822033e-06, "loss": 0.5605, "step": 595 }, { "epoch": 0.1326212728081887, "grad_norm": 1.0418261441873309, "learning_rate": 9.727729735522657e-06, "loss": 0.546, "step": 596 }, { "epoch": 0.1328437917222964, "grad_norm": 0.8712242154160539, "learning_rate": 9.726555586504506e-06, "loss": 0.5432, "step": 597 }, { "epoch": 0.1330663106364041, "grad_norm": 0.8758125439358164, "learning_rate": 9.725378982377472e-06, "loss": 0.5381, "step": 598 }, { "epoch": 0.1332888295505118, "grad_norm": 0.9504378903381056, "learning_rate": 9.72419992375271e-06, "loss": 0.5534, "step": 599 }, { "epoch": 0.13351134846461948, "grad_norm": 0.9257007583111715, "learning_rate": 9.723018411242662e-06, "loss": 0.5459, "step": 600 }, { "epoch": 0.1337338673787272, "grad_norm": 0.9347436620607706, "learning_rate": 9.721834445461038e-06, "loss": 0.5497, "step": 601 }, { "epoch": 0.13395638629283488, "grad_norm": 0.9837473080606224, "learning_rate": 9.720648027022822e-06, "loss": 0.5487, "step": 602 }, { "epoch": 0.1341789052069426, "grad_norm": 0.8454633567000346, "learning_rate": 9.719459156544276e-06, "loss": 0.5431, "step": 603 }, { "epoch": 0.1344014241210503, "grad_norm": 1.0397467328974217, "learning_rate": 9.718267834642933e-06, "loss": 0.5458, "step": 604 }, { "epoch": 0.13462394303515798, "grad_norm": 0.9284787885322171, "learning_rate": 9.717074061937601e-06, "loss": 0.5463, "step": 605 }, { "epoch": 0.1348464619492657, "grad_norm": 1.0961189221956908, "learning_rate": 9.715877839048357e-06, "loss": 0.5481, "step": 606 }, { "epoch": 0.13506898086337338, "grad_norm": 0.8812260813593955, "learning_rate": 9.714679166596557e-06, "loss": 0.5437, "step": 607 }, { "epoch": 0.1352914997774811, "grad_norm": 0.9088552354022158, "learning_rate": 9.713478045204823e-06, "loss": 0.5523, "step": 608 }, { "epoch": 0.13551401869158877, "grad_norm": 0.8887500281820954, "learning_rate": 9.712274475497055e-06, "loss": 0.5357, "step": 609 }, { "epoch": 0.13573653760569648, "grad_norm": 0.9051410874001146, "learning_rate": 9.711068458098418e-06, "loss": 0.5431, "step": 610 }, { "epoch": 0.1359590565198042, "grad_norm": 0.9028504312517174, "learning_rate": 9.709859993635356e-06, "loss": 0.5543, "step": 611 }, { "epoch": 0.13618157543391188, "grad_norm": 0.9275211951420725, "learning_rate": 9.708649082735576e-06, "loss": 0.539, "step": 612 }, { "epoch": 0.1364040943480196, "grad_norm": 0.9545487843627571, "learning_rate": 9.707435726028062e-06, "loss": 0.5504, "step": 613 }, { "epoch": 0.13662661326212727, "grad_norm": 0.893082517079525, "learning_rate": 9.706219924143068e-06, "loss": 0.548, "step": 614 }, { "epoch": 0.13684913217623498, "grad_norm": 0.9776725022937977, "learning_rate": 9.705001677712115e-06, "loss": 0.5502, "step": 615 }, { "epoch": 0.13707165109034267, "grad_norm": 1.1404641354331617, "learning_rate": 9.703780987367996e-06, "loss": 0.5724, "step": 616 }, { "epoch": 0.13729417000445038, "grad_norm": 0.9900614903851451, "learning_rate": 9.702557853744772e-06, "loss": 0.5328, "step": 617 }, { "epoch": 0.1375166889185581, "grad_norm": 0.9086570861603722, "learning_rate": 9.701332277477772e-06, "loss": 0.5748, "step": 618 }, { "epoch": 0.13773920783266577, "grad_norm": 0.892512646649302, "learning_rate": 9.700104259203598e-06, "loss": 0.5306, "step": 619 }, { "epoch": 0.13796172674677348, "grad_norm": 0.9309534546396312, "learning_rate": 9.698873799560117e-06, "loss": 0.5563, "step": 620 }, { "epoch": 0.13818424566088117, "grad_norm": 0.9750581318241945, "learning_rate": 9.697640899186466e-06, "loss": 0.5445, "step": 621 }, { "epoch": 0.13840676457498888, "grad_norm": 0.969745206814096, "learning_rate": 9.696405558723047e-06, "loss": 0.5543, "step": 622 }, { "epoch": 0.13862928348909656, "grad_norm": 0.8455731973969268, "learning_rate": 9.695167778811534e-06, "loss": 0.5463, "step": 623 }, { "epoch": 0.13885180240320427, "grad_norm": 0.9373897233282399, "learning_rate": 9.693927560094864e-06, "loss": 0.5595, "step": 624 }, { "epoch": 0.13907432131731198, "grad_norm": 0.8392712002600807, "learning_rate": 9.692684903217243e-06, "loss": 0.5264, "step": 625 }, { "epoch": 0.13929684023141967, "grad_norm": 0.9331954371930703, "learning_rate": 9.691439808824142e-06, "loss": 0.5674, "step": 626 }, { "epoch": 0.13951935914552738, "grad_norm": 0.9434844381040777, "learning_rate": 9.690192277562298e-06, "loss": 0.5517, "step": 627 }, { "epoch": 0.13974187805963506, "grad_norm": 0.9377314561302348, "learning_rate": 9.688942310079715e-06, "loss": 0.5536, "step": 628 }, { "epoch": 0.13996439697374277, "grad_norm": 0.9544734230544327, "learning_rate": 9.687689907025664e-06, "loss": 0.5541, "step": 629 }, { "epoch": 0.14018691588785046, "grad_norm": 0.8962637959944557, "learning_rate": 9.686435069050676e-06, "loss": 0.5461, "step": 630 }, { "epoch": 0.14040943480195817, "grad_norm": 1.0178961255936685, "learning_rate": 9.685177796806554e-06, "loss": 0.5745, "step": 631 }, { "epoch": 0.14063195371606588, "grad_norm": 0.9545846710634235, "learning_rate": 9.683918090946358e-06, "loss": 0.5566, "step": 632 }, { "epoch": 0.14085447263017356, "grad_norm": 0.949837727645926, "learning_rate": 9.682655952124416e-06, "loss": 0.5519, "step": 633 }, { "epoch": 0.14107699154428127, "grad_norm": 1.0309406769957994, "learning_rate": 9.681391380996321e-06, "loss": 0.5564, "step": 634 }, { "epoch": 0.14129951045838895, "grad_norm": 0.9227981630115369, "learning_rate": 9.680124378218925e-06, "loss": 0.5563, "step": 635 }, { "epoch": 0.14152202937249667, "grad_norm": 0.8853214885031337, "learning_rate": 9.678854944450348e-06, "loss": 0.5763, "step": 636 }, { "epoch": 0.14174454828660435, "grad_norm": 0.9031784799969276, "learning_rate": 9.677583080349968e-06, "loss": 0.5279, "step": 637 }, { "epoch": 0.14196706720071206, "grad_norm": 0.8980959779418652, "learning_rate": 9.67630878657843e-06, "loss": 0.5433, "step": 638 }, { "epoch": 0.14218958611481977, "grad_norm": 0.9346482672804807, "learning_rate": 9.675032063797638e-06, "loss": 0.5473, "step": 639 }, { "epoch": 0.14241210502892745, "grad_norm": 0.9509051440000089, "learning_rate": 9.67375291267076e-06, "loss": 0.5441, "step": 640 }, { "epoch": 0.14263462394303517, "grad_norm": 0.9125015512261022, "learning_rate": 9.67247133386222e-06, "loss": 0.5357, "step": 641 }, { "epoch": 0.14285714285714285, "grad_norm": 0.9314185116749141, "learning_rate": 9.67118732803771e-06, "loss": 0.5448, "step": 642 }, { "epoch": 0.14307966177125056, "grad_norm": 0.9168385735613417, "learning_rate": 9.66990089586418e-06, "loss": 0.5417, "step": 643 }, { "epoch": 0.14330218068535824, "grad_norm": 0.9707200831539041, "learning_rate": 9.668612038009836e-06, "loss": 0.5378, "step": 644 }, { "epoch": 0.14352469959946595, "grad_norm": 0.9027867687140426, "learning_rate": 9.667320755144155e-06, "loss": 0.5666, "step": 645 }, { "epoch": 0.14374721851357367, "grad_norm": 1.0235850173954075, "learning_rate": 9.666027047937858e-06, "loss": 0.5441, "step": 646 }, { "epoch": 0.14396973742768135, "grad_norm": 0.9253296886579686, "learning_rate": 9.664730917062939e-06, "loss": 0.555, "step": 647 }, { "epoch": 0.14419225634178906, "grad_norm": 0.9219557921742162, "learning_rate": 9.663432363192644e-06, "loss": 0.5251, "step": 648 }, { "epoch": 0.14441477525589674, "grad_norm": 0.9558255442360417, "learning_rate": 9.662131387001481e-06, "loss": 0.5575, "step": 649 }, { "epoch": 0.14463729417000445, "grad_norm": 0.8876622085603351, "learning_rate": 9.660827989165211e-06, "loss": 0.5267, "step": 650 }, { "epoch": 0.14485981308411214, "grad_norm": 0.9768592879437584, "learning_rate": 9.65952217036086e-06, "loss": 0.5675, "step": 651 }, { "epoch": 0.14508233199821985, "grad_norm": 0.9228132715800103, "learning_rate": 9.658213931266705e-06, "loss": 0.5382, "step": 652 }, { "epoch": 0.14530485091232756, "grad_norm": 0.9060144696273864, "learning_rate": 9.656903272562286e-06, "loss": 0.5389, "step": 653 }, { "epoch": 0.14552736982643524, "grad_norm": 0.9667310977040396, "learning_rate": 9.655590194928392e-06, "loss": 0.5422, "step": 654 }, { "epoch": 0.14574988874054295, "grad_norm": 0.9139487265137315, "learning_rate": 9.65427469904708e-06, "loss": 0.534, "step": 655 }, { "epoch": 0.14597240765465064, "grad_norm": 0.9418825719116372, "learning_rate": 9.652956785601651e-06, "loss": 0.5548, "step": 656 }, { "epoch": 0.14619492656875835, "grad_norm": 0.9858310399464186, "learning_rate": 9.651636455276668e-06, "loss": 0.5643, "step": 657 }, { "epoch": 0.14641744548286603, "grad_norm": 1.0369261800355283, "learning_rate": 9.65031370875795e-06, "loss": 0.5439, "step": 658 }, { "epoch": 0.14663996439697374, "grad_norm": 0.921095995581829, "learning_rate": 9.648988546732567e-06, "loss": 0.5268, "step": 659 }, { "epoch": 0.14686248331108145, "grad_norm": 0.99372716054472, "learning_rate": 9.647660969888852e-06, "loss": 0.5683, "step": 660 }, { "epoch": 0.14708500222518914, "grad_norm": 0.8958072950398511, "learning_rate": 9.64633097891638e-06, "loss": 0.5509, "step": 661 }, { "epoch": 0.14730752113929685, "grad_norm": 0.9473968267130983, "learning_rate": 9.64499857450599e-06, "loss": 0.5463, "step": 662 }, { "epoch": 0.14753004005340453, "grad_norm": 1.0083514901345518, "learning_rate": 9.64366375734977e-06, "loss": 0.5379, "step": 663 }, { "epoch": 0.14775255896751224, "grad_norm": 0.9130065730577026, "learning_rate": 9.642326528141064e-06, "loss": 0.532, "step": 664 }, { "epoch": 0.14797507788161993, "grad_norm": 0.9888383019819288, "learning_rate": 9.640986887574466e-06, "loss": 0.5618, "step": 665 }, { "epoch": 0.14819759679572764, "grad_norm": 0.8948421539900111, "learning_rate": 9.639644836345823e-06, "loss": 0.5395, "step": 666 }, { "epoch": 0.14842011570983535, "grad_norm": 0.9843252550804905, "learning_rate": 9.638300375152236e-06, "loss": 0.5358, "step": 667 }, { "epoch": 0.14864263462394303, "grad_norm": 0.9553898756797502, "learning_rate": 9.63695350469206e-06, "loss": 0.5538, "step": 668 }, { "epoch": 0.14886515353805074, "grad_norm": 0.93362308642703, "learning_rate": 9.635604225664892e-06, "loss": 0.5392, "step": 669 }, { "epoch": 0.14908767245215843, "grad_norm": 0.9432084919655315, "learning_rate": 9.634252538771588e-06, "loss": 0.5347, "step": 670 }, { "epoch": 0.14931019136626614, "grad_norm": 1.0359089629107352, "learning_rate": 9.632898444714258e-06, "loss": 0.5411, "step": 671 }, { "epoch": 0.14953271028037382, "grad_norm": 0.9881078169923376, "learning_rate": 9.631541944196254e-06, "loss": 0.5483, "step": 672 }, { "epoch": 0.14975522919448153, "grad_norm": 1.0233450678821627, "learning_rate": 9.630183037922178e-06, "loss": 0.5625, "step": 673 }, { "epoch": 0.14997774810858924, "grad_norm": 1.0435507790313767, "learning_rate": 9.62882172659789e-06, "loss": 0.5422, "step": 674 }, { "epoch": 0.15020026702269693, "grad_norm": 1.0126570114981068, "learning_rate": 9.627458010930493e-06, "loss": 0.5525, "step": 675 }, { "epoch": 0.15042278593680464, "grad_norm": 0.8945968860472373, "learning_rate": 9.62609189162834e-06, "loss": 0.5454, "step": 676 }, { "epoch": 0.15064530485091232, "grad_norm": 1.1104756475270483, "learning_rate": 9.62472336940103e-06, "loss": 0.5672, "step": 677 }, { "epoch": 0.15086782376502003, "grad_norm": 0.9884637972390335, "learning_rate": 9.623352444959418e-06, "loss": 0.5403, "step": 678 }, { "epoch": 0.15109034267912771, "grad_norm": 0.872063063760088, "learning_rate": 9.621979119015596e-06, "loss": 0.5548, "step": 679 }, { "epoch": 0.15131286159323543, "grad_norm": 1.075392623698589, "learning_rate": 9.620603392282912e-06, "loss": 0.5478, "step": 680 }, { "epoch": 0.15153538050734314, "grad_norm": 1.050670886162213, "learning_rate": 9.61922526547596e-06, "loss": 0.5482, "step": 681 }, { "epoch": 0.15175789942145082, "grad_norm": 1.0245171787761609, "learning_rate": 9.617844739310573e-06, "loss": 0.5604, "step": 682 }, { "epoch": 0.15198041833555853, "grad_norm": 0.9914923616047586, "learning_rate": 9.616461814503841e-06, "loss": 0.5513, "step": 683 }, { "epoch": 0.15220293724966621, "grad_norm": 0.944338263112598, "learning_rate": 9.615076491774093e-06, "loss": 0.5363, "step": 684 }, { "epoch": 0.15242545616377393, "grad_norm": 0.9438780795315463, "learning_rate": 9.613688771840907e-06, "loss": 0.5482, "step": 685 }, { "epoch": 0.1526479750778816, "grad_norm": 0.9667076978368354, "learning_rate": 9.612298655425101e-06, "loss": 0.5443, "step": 686 }, { "epoch": 0.15287049399198932, "grad_norm": 0.9707346506700251, "learning_rate": 9.610906143248746e-06, "loss": 0.535, "step": 687 }, { "epoch": 0.15309301290609703, "grad_norm": 0.9817026934418788, "learning_rate": 9.60951123603515e-06, "loss": 0.5697, "step": 688 }, { "epoch": 0.15331553182020471, "grad_norm": 0.9602274167554373, "learning_rate": 9.608113934508866e-06, "loss": 0.5443, "step": 689 }, { "epoch": 0.15353805073431243, "grad_norm": 0.8449183991600171, "learning_rate": 9.6067142393957e-06, "loss": 0.5408, "step": 690 }, { "epoch": 0.1537605696484201, "grad_norm": 0.8651282713036196, "learning_rate": 9.605312151422686e-06, "loss": 0.5507, "step": 691 }, { "epoch": 0.15398308856252782, "grad_norm": 1.0388289410305183, "learning_rate": 9.603907671318111e-06, "loss": 0.5579, "step": 692 }, { "epoch": 0.1542056074766355, "grad_norm": 0.9183275094014359, "learning_rate": 9.602500799811504e-06, "loss": 0.5328, "step": 693 }, { "epoch": 0.15442812639074321, "grad_norm": 0.9401365544644374, "learning_rate": 9.601091537633635e-06, "loss": 0.5507, "step": 694 }, { "epoch": 0.15465064530485093, "grad_norm": 0.9558152819653187, "learning_rate": 9.599679885516513e-06, "loss": 0.54, "step": 695 }, { "epoch": 0.1548731642189586, "grad_norm": 0.970440412636145, "learning_rate": 9.598265844193393e-06, "loss": 0.5409, "step": 696 }, { "epoch": 0.15509568313306632, "grad_norm": 0.904912306548271, "learning_rate": 9.596849414398765e-06, "loss": 0.5335, "step": 697 }, { "epoch": 0.155318202047174, "grad_norm": 1.0456961786317749, "learning_rate": 9.595430596868368e-06, "loss": 0.5433, "step": 698 }, { "epoch": 0.15554072096128171, "grad_norm": 1.0568676672875061, "learning_rate": 9.594009392339174e-06, "loss": 0.5705, "step": 699 }, { "epoch": 0.1557632398753894, "grad_norm": 0.8796475214280566, "learning_rate": 9.592585801549396e-06, "loss": 0.5565, "step": 700 }, { "epoch": 0.1559857587894971, "grad_norm": 0.9093421223413819, "learning_rate": 9.591159825238493e-06, "loss": 0.5504, "step": 701 }, { "epoch": 0.15620827770360482, "grad_norm": 0.856989854667778, "learning_rate": 9.589731464147154e-06, "loss": 0.5254, "step": 702 }, { "epoch": 0.1564307966177125, "grad_norm": 0.8715411067292648, "learning_rate": 9.588300719017312e-06, "loss": 0.5266, "step": 703 }, { "epoch": 0.15665331553182021, "grad_norm": 0.8625790556795288, "learning_rate": 9.586867590592134e-06, "loss": 0.5498, "step": 704 }, { "epoch": 0.1568758344459279, "grad_norm": 0.9326633053459101, "learning_rate": 9.585432079616034e-06, "loss": 0.5478, "step": 705 }, { "epoch": 0.1570983533600356, "grad_norm": 0.8807413458832232, "learning_rate": 9.583994186834655e-06, "loss": 0.5462, "step": 706 }, { "epoch": 0.1573208722741433, "grad_norm": 0.8953948850077482, "learning_rate": 9.58255391299488e-06, "loss": 0.5547, "step": 707 }, { "epoch": 0.157543391188251, "grad_norm": 0.9722474145683878, "learning_rate": 9.581111258844826e-06, "loss": 0.5348, "step": 708 }, { "epoch": 0.15776591010235871, "grad_norm": 0.9313121761583617, "learning_rate": 9.579666225133854e-06, "loss": 0.5446, "step": 709 }, { "epoch": 0.1579884290164664, "grad_norm": 0.9586701763009389, "learning_rate": 9.578218812612552e-06, "loss": 0.5604, "step": 710 }, { "epoch": 0.1582109479305741, "grad_norm": 0.965867423472214, "learning_rate": 9.57676902203275e-06, "loss": 0.5286, "step": 711 }, { "epoch": 0.1584334668446818, "grad_norm": 1.0679802580196849, "learning_rate": 9.575316854147509e-06, "loss": 0.5741, "step": 712 }, { "epoch": 0.1586559857587895, "grad_norm": 0.965400981084224, "learning_rate": 9.573862309711129e-06, "loss": 0.5402, "step": 713 }, { "epoch": 0.1588785046728972, "grad_norm": 0.9059139779144411, "learning_rate": 9.57240538947914e-06, "loss": 0.5356, "step": 714 }, { "epoch": 0.1591010235870049, "grad_norm": 0.8760685713495217, "learning_rate": 9.570946094208308e-06, "loss": 0.5287, "step": 715 }, { "epoch": 0.1593235425011126, "grad_norm": 1.0536597805169536, "learning_rate": 9.569484424656636e-06, "loss": 0.5469, "step": 716 }, { "epoch": 0.1595460614152203, "grad_norm": 1.0392897037289208, "learning_rate": 9.568020381583356e-06, "loss": 0.5446, "step": 717 }, { "epoch": 0.159768580329328, "grad_norm": 0.953218239702701, "learning_rate": 9.566553965748932e-06, "loss": 0.547, "step": 718 }, { "epoch": 0.15999109924343569, "grad_norm": 0.9873930547162564, "learning_rate": 9.565085177915064e-06, "loss": 0.5489, "step": 719 }, { "epoch": 0.1602136181575434, "grad_norm": 0.9507242707151682, "learning_rate": 9.563614018844683e-06, "loss": 0.5356, "step": 720 }, { "epoch": 0.16043613707165108, "grad_norm": 0.9996612167920728, "learning_rate": 9.562140489301952e-06, "loss": 0.5406, "step": 721 }, { "epoch": 0.1606586559857588, "grad_norm": 1.0114528496690418, "learning_rate": 9.560664590052261e-06, "loss": 0.5657, "step": 722 }, { "epoch": 0.16088117489986647, "grad_norm": 0.8815328476400062, "learning_rate": 9.559186321862239e-06, "loss": 0.5439, "step": 723 }, { "epoch": 0.16110369381397419, "grad_norm": 0.850153684425134, "learning_rate": 9.557705685499741e-06, "loss": 0.5493, "step": 724 }, { "epoch": 0.1613262127280819, "grad_norm": 0.865906235237232, "learning_rate": 9.556222681733846e-06, "loss": 0.5534, "step": 725 }, { "epoch": 0.16154873164218958, "grad_norm": 0.916349681648505, "learning_rate": 9.554737311334876e-06, "loss": 0.5463, "step": 726 }, { "epoch": 0.1617712505562973, "grad_norm": 0.847634528221112, "learning_rate": 9.553249575074372e-06, "loss": 0.5451, "step": 727 }, { "epoch": 0.16199376947040497, "grad_norm": 0.959410751989895, "learning_rate": 9.551759473725106e-06, "loss": 0.5269, "step": 728 }, { "epoch": 0.16221628838451269, "grad_norm": 0.8590049453112965, "learning_rate": 9.550267008061081e-06, "loss": 0.5331, "step": 729 }, { "epoch": 0.16243880729862037, "grad_norm": 0.8805032607112856, "learning_rate": 9.548772178857526e-06, "loss": 0.5142, "step": 730 }, { "epoch": 0.16266132621272808, "grad_norm": 0.8558752415388174, "learning_rate": 9.547274986890899e-06, "loss": 0.5451, "step": 731 }, { "epoch": 0.1628838451268358, "grad_norm": 0.981011634673489, "learning_rate": 9.545775432938883e-06, "loss": 0.5363, "step": 732 }, { "epoch": 0.16310636404094347, "grad_norm": 0.9387078154089442, "learning_rate": 9.54427351778039e-06, "loss": 0.5391, "step": 733 }, { "epoch": 0.16332888295505119, "grad_norm": 0.9484859831471745, "learning_rate": 9.542769242195559e-06, "loss": 0.5466, "step": 734 }, { "epoch": 0.16355140186915887, "grad_norm": 0.9044628289588839, "learning_rate": 9.541262606965755e-06, "loss": 0.5485, "step": 735 }, { "epoch": 0.16377392078326658, "grad_norm": 0.8837308025858697, "learning_rate": 9.539753612873565e-06, "loss": 0.5353, "step": 736 }, { "epoch": 0.16399643969737426, "grad_norm": 0.9037757364106773, "learning_rate": 9.538242260702805e-06, "loss": 0.5384, "step": 737 }, { "epoch": 0.16421895861148197, "grad_norm": 0.9335812130362199, "learning_rate": 9.536728551238515e-06, "loss": 0.5381, "step": 738 }, { "epoch": 0.16444147752558969, "grad_norm": 0.882077847439675, "learning_rate": 9.535212485266959e-06, "loss": 0.5548, "step": 739 }, { "epoch": 0.16466399643969737, "grad_norm": 0.9385475922551582, "learning_rate": 9.533694063575623e-06, "loss": 0.5436, "step": 740 }, { "epoch": 0.16488651535380508, "grad_norm": 1.0943717858693518, "learning_rate": 9.532173286953224e-06, "loss": 0.5392, "step": 741 }, { "epoch": 0.16510903426791276, "grad_norm": 0.9396454006367052, "learning_rate": 9.530650156189692e-06, "loss": 0.5438, "step": 742 }, { "epoch": 0.16533155318202047, "grad_norm": 0.9352021019304755, "learning_rate": 9.529124672076189e-06, "loss": 0.5513, "step": 743 }, { "epoch": 0.16555407209612816, "grad_norm": 0.9228680816577084, "learning_rate": 9.527596835405093e-06, "loss": 0.54, "step": 744 }, { "epoch": 0.16577659101023587, "grad_norm": 0.922219551682817, "learning_rate": 9.526066646970007e-06, "loss": 0.5297, "step": 745 }, { "epoch": 0.16599910992434358, "grad_norm": 0.9931566200582717, "learning_rate": 9.524534107565752e-06, "loss": 0.5407, "step": 746 }, { "epoch": 0.16622162883845126, "grad_norm": 0.918504960229862, "learning_rate": 9.522999217988378e-06, "loss": 0.5297, "step": 747 }, { "epoch": 0.16644414775255897, "grad_norm": 0.873074718210212, "learning_rate": 9.52146197903515e-06, "loss": 0.5515, "step": 748 }, { "epoch": 0.16666666666666666, "grad_norm": 0.8750364373614109, "learning_rate": 9.51992239150455e-06, "loss": 0.5365, "step": 749 }, { "epoch": 0.16688918558077437, "grad_norm": 0.9506017076118498, "learning_rate": 9.518380456196286e-06, "loss": 0.5401, "step": 750 }, { "epoch": 0.16711170449488205, "grad_norm": 0.9328062119902848, "learning_rate": 9.516836173911285e-06, "loss": 0.5495, "step": 751 }, { "epoch": 0.16733422340898976, "grad_norm": 0.9014721660265961, "learning_rate": 9.515289545451691e-06, "loss": 0.5531, "step": 752 }, { "epoch": 0.16755674232309747, "grad_norm": 0.9504908688379576, "learning_rate": 9.513740571620868e-06, "loss": 0.5418, "step": 753 }, { "epoch": 0.16777926123720516, "grad_norm": 0.8789448787963793, "learning_rate": 9.512189253223397e-06, "loss": 0.5469, "step": 754 }, { "epoch": 0.16800178015131287, "grad_norm": 0.8971478397708276, "learning_rate": 9.510635591065073e-06, "loss": 0.5517, "step": 755 }, { "epoch": 0.16822429906542055, "grad_norm": 0.8707593727118694, "learning_rate": 9.50907958595292e-06, "loss": 0.544, "step": 756 }, { "epoch": 0.16844681797952826, "grad_norm": 0.8877645285897113, "learning_rate": 9.50752123869517e-06, "loss": 0.5558, "step": 757 }, { "epoch": 0.16866933689363595, "grad_norm": 0.9672003030872276, "learning_rate": 9.505960550101269e-06, "loss": 0.5665, "step": 758 }, { "epoch": 0.16889185580774366, "grad_norm": 0.902827975596195, "learning_rate": 9.504397520981889e-06, "loss": 0.562, "step": 759 }, { "epoch": 0.16911437472185137, "grad_norm": 0.8411085467616431, "learning_rate": 9.502832152148907e-06, "loss": 0.5484, "step": 760 }, { "epoch": 0.16933689363595905, "grad_norm": 0.8659899821324305, "learning_rate": 9.501264444415426e-06, "loss": 0.5348, "step": 761 }, { "epoch": 0.16955941255006676, "grad_norm": 1.0520980231144084, "learning_rate": 9.499694398595753e-06, "loss": 0.5416, "step": 762 }, { "epoch": 0.16978193146417445, "grad_norm": 0.9536783913902249, "learning_rate": 9.498122015505419e-06, "loss": 0.5598, "step": 763 }, { "epoch": 0.17000445037828216, "grad_norm": 1.2432991107051734, "learning_rate": 9.496547295961165e-06, "loss": 0.5426, "step": 764 }, { "epoch": 0.17022696929238984, "grad_norm": 0.9289081123625283, "learning_rate": 9.494970240780944e-06, "loss": 0.548, "step": 765 }, { "epoch": 0.17044948820649755, "grad_norm": 1.086866620222976, "learning_rate": 9.493390850783923e-06, "loss": 0.5526, "step": 766 }, { "epoch": 0.17067200712060526, "grad_norm": 0.9224747744240727, "learning_rate": 9.491809126790486e-06, "loss": 0.5536, "step": 767 }, { "epoch": 0.17089452603471295, "grad_norm": 1.0456856046541303, "learning_rate": 9.490225069622221e-06, "loss": 0.5274, "step": 768 }, { "epoch": 0.17111704494882066, "grad_norm": 0.9753285150455994, "learning_rate": 9.488638680101939e-06, "loss": 0.528, "step": 769 }, { "epoch": 0.17133956386292834, "grad_norm": 0.882882239729254, "learning_rate": 9.487049959053649e-06, "loss": 0.5637, "step": 770 }, { "epoch": 0.17156208277703605, "grad_norm": 0.931981701260997, "learning_rate": 9.485458907302585e-06, "loss": 0.5479, "step": 771 }, { "epoch": 0.17178460169114373, "grad_norm": 0.943835402119584, "learning_rate": 9.48386552567518e-06, "loss": 0.5529, "step": 772 }, { "epoch": 0.17200712060525145, "grad_norm": 0.9931892512529527, "learning_rate": 9.482269814999085e-06, "loss": 0.5573, "step": 773 }, { "epoch": 0.17222963951935916, "grad_norm": 0.9173773757661029, "learning_rate": 9.480671776103158e-06, "loss": 0.5332, "step": 774 }, { "epoch": 0.17245215843346684, "grad_norm": 0.9408429920835885, "learning_rate": 9.479071409817467e-06, "loss": 0.5534, "step": 775 }, { "epoch": 0.17267467734757455, "grad_norm": 0.9235888845805659, "learning_rate": 9.477468716973287e-06, "loss": 0.5464, "step": 776 }, { "epoch": 0.17289719626168223, "grad_norm": 0.9138121142178038, "learning_rate": 9.475863698403103e-06, "loss": 0.5542, "step": 777 }, { "epoch": 0.17311971517578995, "grad_norm": 0.8614722842335822, "learning_rate": 9.474256354940606e-06, "loss": 0.5408, "step": 778 }, { "epoch": 0.17334223408989763, "grad_norm": 0.9647456855235993, "learning_rate": 9.4726466874207e-06, "loss": 0.5529, "step": 779 }, { "epoch": 0.17356475300400534, "grad_norm": 1.086201362110904, "learning_rate": 9.471034696679489e-06, "loss": 0.5412, "step": 780 }, { "epoch": 0.17378727191811305, "grad_norm": 1.0346613570950574, "learning_rate": 9.46942038355429e-06, "loss": 0.5512, "step": 781 }, { "epoch": 0.17400979083222073, "grad_norm": 0.9276822159934464, "learning_rate": 9.467803748883624e-06, "loss": 0.5427, "step": 782 }, { "epoch": 0.17423230974632845, "grad_norm": 0.9230547560503501, "learning_rate": 9.466184793507215e-06, "loss": 0.553, "step": 783 }, { "epoch": 0.17445482866043613, "grad_norm": 0.9199021779624202, "learning_rate": 9.464563518265997e-06, "loss": 0.5434, "step": 784 }, { "epoch": 0.17467734757454384, "grad_norm": 0.999641061558486, "learning_rate": 9.462939924002105e-06, "loss": 0.535, "step": 785 }, { "epoch": 0.17489986648865152, "grad_norm": 0.928456679195382, "learning_rate": 9.461314011558881e-06, "loss": 0.5391, "step": 786 }, { "epoch": 0.17512238540275923, "grad_norm": 1.1392068059944542, "learning_rate": 9.459685781780874e-06, "loss": 0.5605, "step": 787 }, { "epoch": 0.17534490431686695, "grad_norm": 0.9896194037811833, "learning_rate": 9.45805523551383e-06, "loss": 0.5465, "step": 788 }, { "epoch": 0.17556742323097463, "grad_norm": 0.9933773954545825, "learning_rate": 9.456422373604701e-06, "loss": 0.5564, "step": 789 }, { "epoch": 0.17578994214508234, "grad_norm": 1.0400304417257071, "learning_rate": 9.454787196901646e-06, "loss": 0.548, "step": 790 }, { "epoch": 0.17601246105919002, "grad_norm": 1.014546034544681, "learning_rate": 9.453149706254018e-06, "loss": 0.535, "step": 791 }, { "epoch": 0.17623497997329773, "grad_norm": 1.0584499470860904, "learning_rate": 9.451509902512383e-06, "loss": 0.5463, "step": 792 }, { "epoch": 0.17645749888740542, "grad_norm": 1.0087658600979452, "learning_rate": 9.449867786528497e-06, "loss": 0.5394, "step": 793 }, { "epoch": 0.17668001780151313, "grad_norm": 0.9050016641520692, "learning_rate": 9.448223359155322e-06, "loss": 0.5327, "step": 794 }, { "epoch": 0.17690253671562084, "grad_norm": 1.0905673572861017, "learning_rate": 9.446576621247025e-06, "loss": 0.5543, "step": 795 }, { "epoch": 0.17712505562972852, "grad_norm": 1.1581252934704651, "learning_rate": 9.444927573658966e-06, "loss": 0.5412, "step": 796 }, { "epoch": 0.17734757454383623, "grad_norm": 0.9472034095398575, "learning_rate": 9.443276217247707e-06, "loss": 0.5595, "step": 797 }, { "epoch": 0.17757009345794392, "grad_norm": 1.050914038366135, "learning_rate": 9.441622552871015e-06, "loss": 0.5408, "step": 798 }, { "epoch": 0.17779261237205163, "grad_norm": 0.953730601302053, "learning_rate": 9.439966581387845e-06, "loss": 0.5656, "step": 799 }, { "epoch": 0.1780151312861593, "grad_norm": 0.9974602554945037, "learning_rate": 9.438308303658358e-06, "loss": 0.5541, "step": 800 }, { "epoch": 0.17823765020026702, "grad_norm": 1.0897822367525756, "learning_rate": 9.436647720543914e-06, "loss": 0.557, "step": 801 }, { "epoch": 0.17846016911437473, "grad_norm": 0.9155844444621162, "learning_rate": 9.434984832907063e-06, "loss": 0.5481, "step": 802 }, { "epoch": 0.17868268802848242, "grad_norm": 0.8932549883441642, "learning_rate": 9.43331964161156e-06, "loss": 0.5528, "step": 803 }, { "epoch": 0.17890520694259013, "grad_norm": 0.9130674334856507, "learning_rate": 9.431652147522352e-06, "loss": 0.5291, "step": 804 }, { "epoch": 0.1791277258566978, "grad_norm": 0.9178591999038902, "learning_rate": 9.429982351505585e-06, "loss": 0.5311, "step": 805 }, { "epoch": 0.17935024477080552, "grad_norm": 0.9777721530267246, "learning_rate": 9.428310254428597e-06, "loss": 0.5294, "step": 806 }, { "epoch": 0.1795727636849132, "grad_norm": 0.9967538087010706, "learning_rate": 9.426635857159922e-06, "loss": 0.546, "step": 807 }, { "epoch": 0.17979528259902092, "grad_norm": 0.9737772106073257, "learning_rate": 9.424959160569293e-06, "loss": 0.5343, "step": 808 }, { "epoch": 0.18001780151312863, "grad_norm": 0.9202440275365953, "learning_rate": 9.423280165527635e-06, "loss": 0.5493, "step": 809 }, { "epoch": 0.1802403204272363, "grad_norm": 0.9661897097801475, "learning_rate": 9.421598872907062e-06, "loss": 0.5399, "step": 810 }, { "epoch": 0.18046283934134402, "grad_norm": 0.8973898975312892, "learning_rate": 9.419915283580892e-06, "loss": 0.5381, "step": 811 }, { "epoch": 0.1806853582554517, "grad_norm": 0.8992706349663198, "learning_rate": 9.418229398423624e-06, "loss": 0.5491, "step": 812 }, { "epoch": 0.18090787716955942, "grad_norm": 1.0194413850043984, "learning_rate": 9.416541218310957e-06, "loss": 0.5639, "step": 813 }, { "epoch": 0.1811303960836671, "grad_norm": 0.9784830026484056, "learning_rate": 9.414850744119783e-06, "loss": 0.525, "step": 814 }, { "epoch": 0.1813529149977748, "grad_norm": 1.0179626976345708, "learning_rate": 9.413157976728178e-06, "loss": 0.5562, "step": 815 }, { "epoch": 0.18157543391188252, "grad_norm": 0.9205598450957042, "learning_rate": 9.41146291701542e-06, "loss": 0.5389, "step": 816 }, { "epoch": 0.1817979528259902, "grad_norm": 1.0767215173704776, "learning_rate": 9.409765565861965e-06, "loss": 0.5565, "step": 817 }, { "epoch": 0.18202047174009792, "grad_norm": 0.9314750830077064, "learning_rate": 9.408065924149471e-06, "loss": 0.5444, "step": 818 }, { "epoch": 0.1822429906542056, "grad_norm": 0.9787577748663422, "learning_rate": 9.406363992760779e-06, "loss": 0.5459, "step": 819 }, { "epoch": 0.1824655095683133, "grad_norm": 0.9392630505816886, "learning_rate": 9.404659772579921e-06, "loss": 0.5349, "step": 820 }, { "epoch": 0.182688028482421, "grad_norm": 0.9527245291739882, "learning_rate": 9.402953264492119e-06, "loss": 0.55, "step": 821 }, { "epoch": 0.1829105473965287, "grad_norm": 1.0542619119250731, "learning_rate": 9.401244469383781e-06, "loss": 0.5465, "step": 822 }, { "epoch": 0.18313306631063642, "grad_norm": 0.8944196193379976, "learning_rate": 9.399533388142505e-06, "loss": 0.5487, "step": 823 }, { "epoch": 0.1833555852247441, "grad_norm": 0.905196940795938, "learning_rate": 9.397820021657079e-06, "loss": 0.5337, "step": 824 }, { "epoch": 0.1835781041388518, "grad_norm": 0.9552811544307601, "learning_rate": 9.396104370817467e-06, "loss": 0.5502, "step": 825 }, { "epoch": 0.1838006230529595, "grad_norm": 0.9432644661767207, "learning_rate": 9.394386436514834e-06, "loss": 0.5314, "step": 826 }, { "epoch": 0.1840231419670672, "grad_norm": 0.8720932712891631, "learning_rate": 9.392666219641523e-06, "loss": 0.5515, "step": 827 }, { "epoch": 0.1842456608811749, "grad_norm": 1.0107360735005766, "learning_rate": 9.390943721091062e-06, "loss": 0.5622, "step": 828 }, { "epoch": 0.1844681797952826, "grad_norm": 0.9297683111977927, "learning_rate": 9.389218941758169e-06, "loss": 0.5362, "step": 829 }, { "epoch": 0.1846906987093903, "grad_norm": 0.9299742551896263, "learning_rate": 9.387491882538744e-06, "loss": 0.5516, "step": 830 }, { "epoch": 0.184913217623498, "grad_norm": 1.0018391919738079, "learning_rate": 9.385762544329869e-06, "loss": 0.5436, "step": 831 }, { "epoch": 0.1851357365376057, "grad_norm": 0.8793938454975055, "learning_rate": 9.384030928029813e-06, "loss": 0.5497, "step": 832 }, { "epoch": 0.1853582554517134, "grad_norm": 0.9000589520083841, "learning_rate": 9.382297034538026e-06, "loss": 0.5381, "step": 833 }, { "epoch": 0.1855807743658211, "grad_norm": 1.0418461659703193, "learning_rate": 9.380560864755145e-06, "loss": 0.5505, "step": 834 }, { "epoch": 0.18580329327992878, "grad_norm": 0.8827380408458576, "learning_rate": 9.378822419582984e-06, "loss": 0.5372, "step": 835 }, { "epoch": 0.1860258121940365, "grad_norm": 0.9871333660668143, "learning_rate": 9.377081699924544e-06, "loss": 0.5704, "step": 836 }, { "epoch": 0.1862483311081442, "grad_norm": 0.9989491659763688, "learning_rate": 9.375338706684003e-06, "loss": 0.566, "step": 837 }, { "epoch": 0.1864708500222519, "grad_norm": 0.8770058769317493, "learning_rate": 9.37359344076672e-06, "loss": 0.5414, "step": 838 }, { "epoch": 0.1866933689363596, "grad_norm": 1.0012596005138992, "learning_rate": 9.37184590307924e-06, "loss": 0.5474, "step": 839 }, { "epoch": 0.18691588785046728, "grad_norm": 0.9317739834970995, "learning_rate": 9.370096094529285e-06, "loss": 0.5547, "step": 840 }, { "epoch": 0.187138406764575, "grad_norm": 0.8645538013740377, "learning_rate": 9.36834401602575e-06, "loss": 0.5218, "step": 841 }, { "epoch": 0.18736092567868268, "grad_norm": 0.9080928569169411, "learning_rate": 9.36658966847872e-06, "loss": 0.531, "step": 842 }, { "epoch": 0.1875834445927904, "grad_norm": 0.9101149021380885, "learning_rate": 9.36483305279945e-06, "loss": 0.5386, "step": 843 }, { "epoch": 0.1878059635068981, "grad_norm": 0.9167287341714593, "learning_rate": 9.363074169900382e-06, "loss": 0.5614, "step": 844 }, { "epoch": 0.18802848242100578, "grad_norm": 0.9920224636281338, "learning_rate": 9.361313020695126e-06, "loss": 0.5597, "step": 845 }, { "epoch": 0.1882510013351135, "grad_norm": 0.8792530396046818, "learning_rate": 9.359549606098474e-06, "loss": 0.524, "step": 846 }, { "epoch": 0.18847352024922118, "grad_norm": 0.8941785034843786, "learning_rate": 9.357783927026395e-06, "loss": 0.5481, "step": 847 }, { "epoch": 0.1886960391633289, "grad_norm": 0.9295612274595709, "learning_rate": 9.356015984396036e-06, "loss": 0.5371, "step": 848 }, { "epoch": 0.18891855807743657, "grad_norm": 1.0403399769185284, "learning_rate": 9.354245779125712e-06, "loss": 0.5422, "step": 849 }, { "epoch": 0.18914107699154428, "grad_norm": 0.928969791339643, "learning_rate": 9.352473312134923e-06, "loss": 0.5529, "step": 850 }, { "epoch": 0.189363595905652, "grad_norm": 0.9571160702938621, "learning_rate": 9.350698584344335e-06, "loss": 0.5325, "step": 851 }, { "epoch": 0.18958611481975968, "grad_norm": 0.935159385000662, "learning_rate": 9.348921596675797e-06, "loss": 0.538, "step": 852 }, { "epoch": 0.1898086337338674, "grad_norm": 0.9629477374760822, "learning_rate": 9.347142350052326e-06, "loss": 0.5456, "step": 853 }, { "epoch": 0.19003115264797507, "grad_norm": 0.8974036612715874, "learning_rate": 9.345360845398112e-06, "loss": 0.5559, "step": 854 }, { "epoch": 0.19025367156208278, "grad_norm": 1.0672132657243392, "learning_rate": 9.343577083638522e-06, "loss": 0.543, "step": 855 }, { "epoch": 0.19047619047619047, "grad_norm": 0.8929136755436676, "learning_rate": 9.341791065700092e-06, "loss": 0.5478, "step": 856 }, { "epoch": 0.19069870939029818, "grad_norm": 1.0165571084756708, "learning_rate": 9.340002792510532e-06, "loss": 0.5413, "step": 857 }, { "epoch": 0.1909212283044059, "grad_norm": 0.9779185246268655, "learning_rate": 9.338212264998722e-06, "loss": 0.5589, "step": 858 }, { "epoch": 0.19114374721851357, "grad_norm": 1.155609780810925, "learning_rate": 9.336419484094714e-06, "loss": 0.5497, "step": 859 }, { "epoch": 0.19136626613262128, "grad_norm": 0.8752570224489726, "learning_rate": 9.334624450729729e-06, "loss": 0.5285, "step": 860 }, { "epoch": 0.19158878504672897, "grad_norm": 0.9693199324122374, "learning_rate": 9.33282716583616e-06, "loss": 0.5418, "step": 861 }, { "epoch": 0.19181130396083668, "grad_norm": 1.001560670498493, "learning_rate": 9.331027630347567e-06, "loss": 0.5374, "step": 862 }, { "epoch": 0.19203382287494436, "grad_norm": 0.8605835379146219, "learning_rate": 9.329225845198681e-06, "loss": 0.5261, "step": 863 }, { "epoch": 0.19225634178905207, "grad_norm": 0.9798646087693252, "learning_rate": 9.327421811325402e-06, "loss": 0.5428, "step": 864 }, { "epoch": 0.19247886070315978, "grad_norm": 1.0303839969737063, "learning_rate": 9.325615529664795e-06, "loss": 0.5487, "step": 865 }, { "epoch": 0.19270137961726747, "grad_norm": 0.9118813482591421, "learning_rate": 9.323807001155098e-06, "loss": 0.5454, "step": 866 }, { "epoch": 0.19292389853137518, "grad_norm": 0.8081986981566145, "learning_rate": 9.32199622673571e-06, "loss": 0.5339, "step": 867 }, { "epoch": 0.19314641744548286, "grad_norm": 1.032383489470202, "learning_rate": 9.3201832073472e-06, "loss": 0.5327, "step": 868 }, { "epoch": 0.19336893635959057, "grad_norm": 0.7995655566262562, "learning_rate": 9.318367943931304e-06, "loss": 0.5399, "step": 869 }, { "epoch": 0.19359145527369825, "grad_norm": 0.9806196222200257, "learning_rate": 9.316550437430917e-06, "loss": 0.5721, "step": 870 }, { "epoch": 0.19381397418780597, "grad_norm": 0.9158435781407818, "learning_rate": 9.314730688790111e-06, "loss": 0.5344, "step": 871 }, { "epoch": 0.19403649310191368, "grad_norm": 0.9187348314648979, "learning_rate": 9.312908698954113e-06, "loss": 0.5481, "step": 872 }, { "epoch": 0.19425901201602136, "grad_norm": 1.0007477295531848, "learning_rate": 9.311084468869314e-06, "loss": 0.5541, "step": 873 }, { "epoch": 0.19448153093012907, "grad_norm": 0.8985385209465464, "learning_rate": 9.309257999483274e-06, "loss": 0.5369, "step": 874 }, { "epoch": 0.19470404984423675, "grad_norm": 0.8922048143552391, "learning_rate": 9.307429291744714e-06, "loss": 0.518, "step": 875 }, { "epoch": 0.19492656875834447, "grad_norm": 0.9269752440213612, "learning_rate": 9.305598346603518e-06, "loss": 0.5463, "step": 876 }, { "epoch": 0.19514908767245215, "grad_norm": 0.8610906610628554, "learning_rate": 9.303765165010727e-06, "loss": 0.534, "step": 877 }, { "epoch": 0.19537160658655986, "grad_norm": 0.962121197816863, "learning_rate": 9.301929747918555e-06, "loss": 0.5563, "step": 878 }, { "epoch": 0.19559412550066757, "grad_norm": 0.8599716436684444, "learning_rate": 9.300092096280367e-06, "loss": 0.5346, "step": 879 }, { "epoch": 0.19581664441477525, "grad_norm": 0.8906699472497915, "learning_rate": 9.29825221105069e-06, "loss": 0.5236, "step": 880 }, { "epoch": 0.19603916332888296, "grad_norm": 0.8642901867813236, "learning_rate": 9.296410093185219e-06, "loss": 0.538, "step": 881 }, { "epoch": 0.19626168224299065, "grad_norm": 0.89012224710647, "learning_rate": 9.294565743640797e-06, "loss": 0.5314, "step": 882 }, { "epoch": 0.19648420115709836, "grad_norm": 0.9483861040968167, "learning_rate": 9.292719163375437e-06, "loss": 0.548, "step": 883 }, { "epoch": 0.19670672007120604, "grad_norm": 0.9349064559759229, "learning_rate": 9.290870353348302e-06, "loss": 0.5482, "step": 884 }, { "epoch": 0.19692923898531375, "grad_norm": 0.9111666702354968, "learning_rate": 9.289019314519719e-06, "loss": 0.5324, "step": 885 }, { "epoch": 0.19715175789942144, "grad_norm": 0.9989972510939797, "learning_rate": 9.28716604785117e-06, "loss": 0.5476, "step": 886 }, { "epoch": 0.19737427681352915, "grad_norm": 0.9446008213922912, "learning_rate": 9.285310554305298e-06, "loss": 0.5402, "step": 887 }, { "epoch": 0.19759679572763686, "grad_norm": 0.873653371147649, "learning_rate": 9.283452834845894e-06, "loss": 0.5604, "step": 888 }, { "epoch": 0.19781931464174454, "grad_norm": 0.9243778353895749, "learning_rate": 9.281592890437916e-06, "loss": 0.5395, "step": 889 }, { "epoch": 0.19804183355585225, "grad_norm": 0.8546931707348868, "learning_rate": 9.279730722047472e-06, "loss": 0.5396, "step": 890 }, { "epoch": 0.19826435246995994, "grad_norm": 0.8598661695660772, "learning_rate": 9.27786633064182e-06, "loss": 0.5299, "step": 891 }, { "epoch": 0.19848687138406765, "grad_norm": 1.0226547605279122, "learning_rate": 9.275999717189388e-06, "loss": 0.5621, "step": 892 }, { "epoch": 0.19870939029817533, "grad_norm": 0.9536391520517309, "learning_rate": 9.274130882659741e-06, "loss": 0.5272, "step": 893 }, { "epoch": 0.19893190921228304, "grad_norm": 0.9396775646205558, "learning_rate": 9.272259828023609e-06, "loss": 0.5268, "step": 894 }, { "epoch": 0.19915442812639075, "grad_norm": 1.0497206539373414, "learning_rate": 9.27038655425287e-06, "loss": 0.5411, "step": 895 }, { "epoch": 0.19937694704049844, "grad_norm": 0.8165732572650105, "learning_rate": 9.268511062320559e-06, "loss": 0.5422, "step": 896 }, { "epoch": 0.19959946595460615, "grad_norm": 0.9147644752356069, "learning_rate": 9.266633353200857e-06, "loss": 0.5485, "step": 897 }, { "epoch": 0.19982198486871383, "grad_norm": 0.9252346261114947, "learning_rate": 9.264753427869103e-06, "loss": 0.5353, "step": 898 }, { "epoch": 0.20004450378282154, "grad_norm": 1.002924333507921, "learning_rate": 9.26287128730178e-06, "loss": 0.5479, "step": 899 }, { "epoch": 0.20026702269692923, "grad_norm": 0.9767721846750783, "learning_rate": 9.260986932476532e-06, "loss": 0.5261, "step": 900 }, { "epoch": 0.20048954161103694, "grad_norm": 0.9846071303720525, "learning_rate": 9.259100364372141e-06, "loss": 0.5348, "step": 901 }, { "epoch": 0.20071206052514465, "grad_norm": 0.8560296006258145, "learning_rate": 9.25721158396855e-06, "loss": 0.5216, "step": 902 }, { "epoch": 0.20093457943925233, "grad_norm": 0.9214351441595776, "learning_rate": 9.255320592246842e-06, "loss": 0.54, "step": 903 }, { "epoch": 0.20115709835336004, "grad_norm": 0.9859838957160811, "learning_rate": 9.253427390189253e-06, "loss": 0.5234, "step": 904 }, { "epoch": 0.20137961726746773, "grad_norm": 0.8517682205338586, "learning_rate": 9.25153197877917e-06, "loss": 0.5428, "step": 905 }, { "epoch": 0.20160213618157544, "grad_norm": 0.9040335316961158, "learning_rate": 9.24963435900112e-06, "loss": 0.5487, "step": 906 }, { "epoch": 0.20182465509568312, "grad_norm": 0.891190568752322, "learning_rate": 9.247734531840784e-06, "loss": 0.5536, "step": 907 }, { "epoch": 0.20204717400979083, "grad_norm": 0.924697706186279, "learning_rate": 9.245832498284986e-06, "loss": 0.5465, "step": 908 }, { "epoch": 0.20226969292389854, "grad_norm": 0.9788508905973016, "learning_rate": 9.243928259321694e-06, "loss": 0.5391, "step": 909 }, { "epoch": 0.20249221183800623, "grad_norm": 0.9132388830525291, "learning_rate": 9.242021815940031e-06, "loss": 0.5201, "step": 910 }, { "epoch": 0.20271473075211394, "grad_norm": 0.8965963634223432, "learning_rate": 9.240113169130252e-06, "loss": 0.5342, "step": 911 }, { "epoch": 0.20293724966622162, "grad_norm": 0.9450504073575638, "learning_rate": 9.238202319883767e-06, "loss": 0.5303, "step": 912 }, { "epoch": 0.20315976858032933, "grad_norm": 0.9670111758514962, "learning_rate": 9.236289269193127e-06, "loss": 0.5414, "step": 913 }, { "epoch": 0.20338228749443701, "grad_norm": 0.8951364744792476, "learning_rate": 9.234374018052018e-06, "loss": 0.5452, "step": 914 }, { "epoch": 0.20360480640854473, "grad_norm": 0.9783545151861016, "learning_rate": 9.232456567455288e-06, "loss": 0.5511, "step": 915 }, { "epoch": 0.20382732532265244, "grad_norm": 0.9111131765757047, "learning_rate": 9.230536918398906e-06, "loss": 0.5429, "step": 916 }, { "epoch": 0.20404984423676012, "grad_norm": 0.838995475286134, "learning_rate": 9.228615071879998e-06, "loss": 0.5332, "step": 917 }, { "epoch": 0.20427236315086783, "grad_norm": 0.8883475567995751, "learning_rate": 9.226691028896823e-06, "loss": 0.5552, "step": 918 }, { "epoch": 0.20449488206497551, "grad_norm": 0.8950653461937501, "learning_rate": 9.22476479044879e-06, "loss": 0.5482, "step": 919 }, { "epoch": 0.20471740097908322, "grad_norm": 0.9059014263733383, "learning_rate": 9.222836357536437e-06, "loss": 0.5293, "step": 920 }, { "epoch": 0.2049399198931909, "grad_norm": 0.868690955864767, "learning_rate": 9.22090573116145e-06, "loss": 0.5278, "step": 921 }, { "epoch": 0.20516243880729862, "grad_norm": 0.8976237909231038, "learning_rate": 9.21897291232665e-06, "loss": 0.536, "step": 922 }, { "epoch": 0.20538495772140633, "grad_norm": 0.9419384134839573, "learning_rate": 9.217037902036002e-06, "loss": 0.5356, "step": 923 }, { "epoch": 0.205607476635514, "grad_norm": 0.9245986355736284, "learning_rate": 9.215100701294604e-06, "loss": 0.5764, "step": 924 }, { "epoch": 0.20582999554962172, "grad_norm": 0.8585442743943653, "learning_rate": 9.213161311108691e-06, "loss": 0.5493, "step": 925 }, { "epoch": 0.2060525144637294, "grad_norm": 0.8577113843889077, "learning_rate": 9.211219732485644e-06, "loss": 0.5214, "step": 926 }, { "epoch": 0.20627503337783712, "grad_norm": 0.9555074858425446, "learning_rate": 9.209275966433971e-06, "loss": 0.5543, "step": 927 }, { "epoch": 0.2064975522919448, "grad_norm": 0.9124999209826103, "learning_rate": 9.20733001396332e-06, "loss": 0.5542, "step": 928 }, { "epoch": 0.2067200712060525, "grad_norm": 0.8875697090104107, "learning_rate": 9.205381876084476e-06, "loss": 0.5341, "step": 929 }, { "epoch": 0.20694259012016022, "grad_norm": 0.8620075392500092, "learning_rate": 9.203431553809357e-06, "loss": 0.5418, "step": 930 }, { "epoch": 0.2071651090342679, "grad_norm": 0.8949145153260035, "learning_rate": 9.201479048151015e-06, "loss": 0.5346, "step": 931 }, { "epoch": 0.20738762794837562, "grad_norm": 0.9018978065067553, "learning_rate": 9.199524360123641e-06, "loss": 0.5327, "step": 932 }, { "epoch": 0.2076101468624833, "grad_norm": 0.9060646235557744, "learning_rate": 9.197567490742554e-06, "loss": 0.5583, "step": 933 }, { "epoch": 0.207832665776591, "grad_norm": 0.8715400779634714, "learning_rate": 9.195608441024207e-06, "loss": 0.534, "step": 934 }, { "epoch": 0.2080551846906987, "grad_norm": 0.984502285106636, "learning_rate": 9.19364721198619e-06, "loss": 0.5343, "step": 935 }, { "epoch": 0.2082777036048064, "grad_norm": 0.9050075950266842, "learning_rate": 9.19168380464722e-06, "loss": 0.543, "step": 936 }, { "epoch": 0.20850022251891412, "grad_norm": 0.9462174603645688, "learning_rate": 9.189718220027147e-06, "loss": 0.5338, "step": 937 }, { "epoch": 0.2087227414330218, "grad_norm": 0.9414082916277274, "learning_rate": 9.187750459146954e-06, "loss": 0.539, "step": 938 }, { "epoch": 0.2089452603471295, "grad_norm": 1.451150538347572, "learning_rate": 9.185780523028748e-06, "loss": 0.5494, "step": 939 }, { "epoch": 0.2091677792612372, "grad_norm": 0.947348394125717, "learning_rate": 9.183808412695775e-06, "loss": 0.5535, "step": 940 }, { "epoch": 0.2093902981753449, "grad_norm": 0.9124114736129486, "learning_rate": 9.181834129172406e-06, "loss": 0.5144, "step": 941 }, { "epoch": 0.2096128170894526, "grad_norm": 0.9541110861694908, "learning_rate": 9.179857673484135e-06, "loss": 0.5349, "step": 942 }, { "epoch": 0.2098353360035603, "grad_norm": 0.9034612870763057, "learning_rate": 9.177879046657599e-06, "loss": 0.5467, "step": 943 }, { "epoch": 0.210057854917668, "grad_norm": 0.8939107411154043, "learning_rate": 9.175898249720545e-06, "loss": 0.5354, "step": 944 }, { "epoch": 0.2102803738317757, "grad_norm": 0.9017289770270746, "learning_rate": 9.17391528370186e-06, "loss": 0.5327, "step": 945 }, { "epoch": 0.2105028927458834, "grad_norm": 0.8899857816953683, "learning_rate": 9.171930149631553e-06, "loss": 0.5576, "step": 946 }, { "epoch": 0.2107254116599911, "grad_norm": 0.8954136451549607, "learning_rate": 9.16994284854076e-06, "loss": 0.5384, "step": 947 }, { "epoch": 0.2109479305740988, "grad_norm": 0.8298461447025162, "learning_rate": 9.167953381461744e-06, "loss": 0.5296, "step": 948 }, { "epoch": 0.21117044948820649, "grad_norm": 0.9101821353569323, "learning_rate": 9.165961749427887e-06, "loss": 0.5377, "step": 949 }, { "epoch": 0.2113929684023142, "grad_norm": 0.9307530703406378, "learning_rate": 9.163967953473705e-06, "loss": 0.5302, "step": 950 }, { "epoch": 0.2116154873164219, "grad_norm": 0.9142278064049204, "learning_rate": 9.161971994634829e-06, "loss": 0.5277, "step": 951 }, { "epoch": 0.2118380062305296, "grad_norm": 0.9261505899114204, "learning_rate": 9.159973873948019e-06, "loss": 0.532, "step": 952 }, { "epoch": 0.2120605251446373, "grad_norm": 1.5056732610082135, "learning_rate": 9.157973592451154e-06, "loss": 0.5478, "step": 953 }, { "epoch": 0.21228304405874499, "grad_norm": 1.088176794432446, "learning_rate": 9.155971151183242e-06, "loss": 0.5489, "step": 954 }, { "epoch": 0.2125055629728527, "grad_norm": 0.85149305344915, "learning_rate": 9.153966551184406e-06, "loss": 0.5327, "step": 955 }, { "epoch": 0.21272808188696038, "grad_norm": 0.9677267663462706, "learning_rate": 9.151959793495894e-06, "loss": 0.5549, "step": 956 }, { "epoch": 0.2129506008010681, "grad_norm": 0.8814019949873977, "learning_rate": 9.149950879160072e-06, "loss": 0.5593, "step": 957 }, { "epoch": 0.2131731197151758, "grad_norm": 0.858852358498263, "learning_rate": 9.14793980922043e-06, "loss": 0.5385, "step": 958 }, { "epoch": 0.21339563862928349, "grad_norm": 0.885317401855839, "learning_rate": 9.145926584721574e-06, "loss": 0.5443, "step": 959 }, { "epoch": 0.2136181575433912, "grad_norm": 1.03263857352908, "learning_rate": 9.14391120670923e-06, "loss": 0.5439, "step": 960 }, { "epoch": 0.21384067645749888, "grad_norm": 0.8140840068919853, "learning_rate": 9.141893676230246e-06, "loss": 0.5177, "step": 961 }, { "epoch": 0.2140631953716066, "grad_norm": 0.9541398439711661, "learning_rate": 9.139873994332583e-06, "loss": 0.5471, "step": 962 }, { "epoch": 0.21428571428571427, "grad_norm": 0.838079712490682, "learning_rate": 9.13785216206532e-06, "loss": 0.5235, "step": 963 }, { "epoch": 0.21450823319982198, "grad_norm": 0.9329998808938358, "learning_rate": 9.135828180478663e-06, "loss": 0.5299, "step": 964 }, { "epoch": 0.2147307521139297, "grad_norm": 0.813727847827005, "learning_rate": 9.133802050623916e-06, "loss": 0.5376, "step": 965 }, { "epoch": 0.21495327102803738, "grad_norm": 0.9411954359482051, "learning_rate": 9.131773773553517e-06, "loss": 0.5307, "step": 966 }, { "epoch": 0.2151757899421451, "grad_norm": 0.884231611289503, "learning_rate": 9.129743350321007e-06, "loss": 0.5436, "step": 967 }, { "epoch": 0.21539830885625277, "grad_norm": 0.9014520273786992, "learning_rate": 9.127710781981047e-06, "loss": 0.5585, "step": 968 }, { "epoch": 0.21562082777036048, "grad_norm": 0.8960860434758908, "learning_rate": 9.125676069589414e-06, "loss": 0.5327, "step": 969 }, { "epoch": 0.21584334668446817, "grad_norm": 0.9292283216676402, "learning_rate": 9.123639214202991e-06, "loss": 0.5442, "step": 970 }, { "epoch": 0.21606586559857588, "grad_norm": 0.9141429701300173, "learning_rate": 9.121600216879782e-06, "loss": 0.5453, "step": 971 }, { "epoch": 0.2162883845126836, "grad_norm": 0.9582587163597207, "learning_rate": 9.119559078678903e-06, "loss": 0.5375, "step": 972 }, { "epoch": 0.21651090342679127, "grad_norm": 0.8966420188653029, "learning_rate": 9.117515800660578e-06, "loss": 0.5449, "step": 973 }, { "epoch": 0.21673342234089898, "grad_norm": 0.8920621570950636, "learning_rate": 9.115470383886144e-06, "loss": 0.5353, "step": 974 }, { "epoch": 0.21695594125500667, "grad_norm": 0.8252207528128275, "learning_rate": 9.11342282941805e-06, "loss": 0.5441, "step": 975 }, { "epoch": 0.21717846016911438, "grad_norm": 0.8888179450484128, "learning_rate": 9.111373138319852e-06, "loss": 0.5347, "step": 976 }, { "epoch": 0.21740097908322206, "grad_norm": 0.8630701483023263, "learning_rate": 9.109321311656224e-06, "loss": 0.5393, "step": 977 }, { "epoch": 0.21762349799732977, "grad_norm": 0.9186549791170572, "learning_rate": 9.107267350492938e-06, "loss": 0.5495, "step": 978 }, { "epoch": 0.21784601691143748, "grad_norm": 0.8875143635169165, "learning_rate": 9.105211255896885e-06, "loss": 0.5299, "step": 979 }, { "epoch": 0.21806853582554517, "grad_norm": 1.0189551394892324, "learning_rate": 9.103153028936058e-06, "loss": 0.5346, "step": 980 }, { "epoch": 0.21829105473965288, "grad_norm": 0.9236387119330011, "learning_rate": 9.101092670679556e-06, "loss": 0.5368, "step": 981 }, { "epoch": 0.21851357365376056, "grad_norm": 1.040856565858946, "learning_rate": 9.099030182197594e-06, "loss": 0.5411, "step": 982 }, { "epoch": 0.21873609256786827, "grad_norm": 0.8464827670746965, "learning_rate": 9.096965564561483e-06, "loss": 0.5229, "step": 983 }, { "epoch": 0.21895861148197596, "grad_norm": 0.8590669601406761, "learning_rate": 9.09489881884365e-06, "loss": 0.5328, "step": 984 }, { "epoch": 0.21918113039608367, "grad_norm": 0.8476104102205835, "learning_rate": 9.092829946117616e-06, "loss": 0.5359, "step": 985 }, { "epoch": 0.21940364931019138, "grad_norm": 0.8393437659636098, "learning_rate": 9.090758947458018e-06, "loss": 0.5356, "step": 986 }, { "epoch": 0.21962616822429906, "grad_norm": 0.9617160151259291, "learning_rate": 9.08868582394059e-06, "loss": 0.5475, "step": 987 }, { "epoch": 0.21984868713840677, "grad_norm": 1.0718856028666652, "learning_rate": 9.086610576642173e-06, "loss": 0.5326, "step": 988 }, { "epoch": 0.22007120605251446, "grad_norm": 1.0242522363684117, "learning_rate": 9.084533206640707e-06, "loss": 0.5495, "step": 989 }, { "epoch": 0.22029372496662217, "grad_norm": 0.8652978209368248, "learning_rate": 9.082453715015242e-06, "loss": 0.5383, "step": 990 }, { "epoch": 0.22051624388072985, "grad_norm": 1.0423488637991856, "learning_rate": 9.080372102845923e-06, "loss": 0.5209, "step": 991 }, { "epoch": 0.22073876279483756, "grad_norm": 0.8663797724592172, "learning_rate": 9.078288371214e-06, "loss": 0.5352, "step": 992 }, { "epoch": 0.22096128170894527, "grad_norm": 2.24147000255606, "learning_rate": 9.076202521201824e-06, "loss": 0.5268, "step": 993 }, { "epoch": 0.22118380062305296, "grad_norm": 0.8979487246202016, "learning_rate": 9.074114553892844e-06, "loss": 0.5413, "step": 994 }, { "epoch": 0.22140631953716067, "grad_norm": 1.031124828592919, "learning_rate": 9.072024470371612e-06, "loss": 0.5532, "step": 995 }, { "epoch": 0.22162883845126835, "grad_norm": 0.8266335980656452, "learning_rate": 9.069932271723774e-06, "loss": 0.5493, "step": 996 }, { "epoch": 0.22185135736537606, "grad_norm": 0.90643818862225, "learning_rate": 9.067837959036083e-06, "loss": 0.543, "step": 997 }, { "epoch": 0.22207387627948375, "grad_norm": 0.8865908784236274, "learning_rate": 9.065741533396382e-06, "loss": 0.5564, "step": 998 }, { "epoch": 0.22229639519359146, "grad_norm": 0.8124323563920933, "learning_rate": 9.063642995893615e-06, "loss": 0.5308, "step": 999 }, { "epoch": 0.22251891410769917, "grad_norm": 1.0005138112366967, "learning_rate": 9.061542347617825e-06, "loss": 0.5479, "step": 1000 }, { "epoch": 0.22274143302180685, "grad_norm": 0.892494197502284, "learning_rate": 9.059439589660145e-06, "loss": 0.5512, "step": 1001 }, { "epoch": 0.22296395193591456, "grad_norm": 0.8928571753894089, "learning_rate": 9.057334723112812e-06, "loss": 0.5218, "step": 1002 }, { "epoch": 0.22318647085002224, "grad_norm": 0.9033185390564455, "learning_rate": 9.055227749069152e-06, "loss": 0.5405, "step": 1003 }, { "epoch": 0.22340898976412996, "grad_norm": 0.9857218482513225, "learning_rate": 9.05311866862359e-06, "loss": 0.5411, "step": 1004 }, { "epoch": 0.22363150867823764, "grad_norm": 0.9011546444049706, "learning_rate": 9.05100748287164e-06, "loss": 0.5308, "step": 1005 }, { "epoch": 0.22385402759234535, "grad_norm": 0.9830962317280695, "learning_rate": 9.048894192909913e-06, "loss": 0.5308, "step": 1006 }, { "epoch": 0.22407654650645306, "grad_norm": 0.9232087318087289, "learning_rate": 9.046778799836115e-06, "loss": 0.5242, "step": 1007 }, { "epoch": 0.22429906542056074, "grad_norm": 0.8889809303689161, "learning_rate": 9.04466130474904e-06, "loss": 0.5219, "step": 1008 }, { "epoch": 0.22452158433466846, "grad_norm": 0.9135736184948385, "learning_rate": 9.042541708748577e-06, "loss": 0.5514, "step": 1009 }, { "epoch": 0.22474410324877614, "grad_norm": 0.9174506359655632, "learning_rate": 9.040420012935705e-06, "loss": 0.5517, "step": 1010 }, { "epoch": 0.22496662216288385, "grad_norm": 0.8867529031951181, "learning_rate": 9.038296218412492e-06, "loss": 0.5197, "step": 1011 }, { "epoch": 0.22518914107699153, "grad_norm": 0.864640695583217, "learning_rate": 9.0361703262821e-06, "loss": 0.5327, "step": 1012 }, { "epoch": 0.22541165999109924, "grad_norm": 0.9450759512562322, "learning_rate": 9.034042337648778e-06, "loss": 0.5311, "step": 1013 }, { "epoch": 0.22563417890520696, "grad_norm": 0.8937780790129333, "learning_rate": 9.031912253617865e-06, "loss": 0.5452, "step": 1014 }, { "epoch": 0.22585669781931464, "grad_norm": 0.8833976763825536, "learning_rate": 9.029780075295787e-06, "loss": 0.5353, "step": 1015 }, { "epoch": 0.22607921673342235, "grad_norm": 0.9338976533686977, "learning_rate": 9.02764580379006e-06, "loss": 0.5313, "step": 1016 }, { "epoch": 0.22630173564753003, "grad_norm": 0.9362429301403736, "learning_rate": 9.025509440209284e-06, "loss": 0.5711, "step": 1017 }, { "epoch": 0.22652425456163774, "grad_norm": 0.9032067328778332, "learning_rate": 9.023370985663147e-06, "loss": 0.5645, "step": 1018 }, { "epoch": 0.22674677347574543, "grad_norm": 0.9005041243306395, "learning_rate": 9.021230441262427e-06, "loss": 0.5332, "step": 1019 }, { "epoch": 0.22696929238985314, "grad_norm": 0.879134099787427, "learning_rate": 9.019087808118982e-06, "loss": 0.5238, "step": 1020 }, { "epoch": 0.22719181130396085, "grad_norm": 0.892813611520128, "learning_rate": 9.016943087345759e-06, "loss": 0.5275, "step": 1021 }, { "epoch": 0.22741433021806853, "grad_norm": 1.03659304098728, "learning_rate": 9.014796280056786e-06, "loss": 0.5447, "step": 1022 }, { "epoch": 0.22763684913217624, "grad_norm": 0.923515247905146, "learning_rate": 9.012647387367179e-06, "loss": 0.5398, "step": 1023 }, { "epoch": 0.22785936804628393, "grad_norm": 0.8755937158704781, "learning_rate": 9.01049641039313e-06, "loss": 0.528, "step": 1024 }, { "epoch": 0.22808188696039164, "grad_norm": 0.878606550425456, "learning_rate": 9.008343350251923e-06, "loss": 0.5383, "step": 1025 }, { "epoch": 0.22830440587449932, "grad_norm": 0.8872306369206223, "learning_rate": 9.006188208061916e-06, "loss": 0.5334, "step": 1026 }, { "epoch": 0.22852692478860703, "grad_norm": 0.9867274340493323, "learning_rate": 9.004030984942555e-06, "loss": 0.5522, "step": 1027 }, { "epoch": 0.22874944370271474, "grad_norm": 0.9496409955643403, "learning_rate": 9.001871682014361e-06, "loss": 0.5405, "step": 1028 }, { "epoch": 0.22897196261682243, "grad_norm": 0.8697349712317393, "learning_rate": 8.999710300398939e-06, "loss": 0.5368, "step": 1029 }, { "epoch": 0.22919448153093014, "grad_norm": 0.9707446228544722, "learning_rate": 8.997546841218971e-06, "loss": 0.5478, "step": 1030 }, { "epoch": 0.22941700044503782, "grad_norm": 0.8888490550774382, "learning_rate": 8.995381305598224e-06, "loss": 0.5356, "step": 1031 }, { "epoch": 0.22963951935914553, "grad_norm": 0.8782862302541028, "learning_rate": 8.993213694661537e-06, "loss": 0.5318, "step": 1032 }, { "epoch": 0.22986203827325322, "grad_norm": 1.003846434651609, "learning_rate": 8.99104400953483e-06, "loss": 0.5291, "step": 1033 }, { "epoch": 0.23008455718736093, "grad_norm": 0.9112402842993238, "learning_rate": 8.988872251345097e-06, "loss": 0.5391, "step": 1034 }, { "epoch": 0.23030707610146864, "grad_norm": 0.9094698717652762, "learning_rate": 8.986698421220416e-06, "loss": 0.54, "step": 1035 }, { "epoch": 0.23052959501557632, "grad_norm": 0.8872142375745947, "learning_rate": 8.984522520289934e-06, "loss": 0.5519, "step": 1036 }, { "epoch": 0.23075211392968403, "grad_norm": 0.8951173909886807, "learning_rate": 8.982344549683878e-06, "loss": 0.5519, "step": 1037 }, { "epoch": 0.23097463284379172, "grad_norm": 0.861272911927614, "learning_rate": 8.980164510533548e-06, "loss": 0.5359, "step": 1038 }, { "epoch": 0.23119715175789943, "grad_norm": 0.8136244004823208, "learning_rate": 8.977982403971319e-06, "loss": 0.5247, "step": 1039 }, { "epoch": 0.2314196706720071, "grad_norm": 0.9269421374890472, "learning_rate": 8.97579823113064e-06, "loss": 0.5379, "step": 1040 }, { "epoch": 0.23164218958611482, "grad_norm": 0.8924404433813939, "learning_rate": 8.973611993146032e-06, "loss": 0.5287, "step": 1041 }, { "epoch": 0.23186470850022253, "grad_norm": 0.8670213453179688, "learning_rate": 8.971423691153094e-06, "loss": 0.532, "step": 1042 }, { "epoch": 0.23208722741433022, "grad_norm": 0.8613554633155818, "learning_rate": 8.969233326288486e-06, "loss": 0.5465, "step": 1043 }, { "epoch": 0.23230974632843793, "grad_norm": 1.0101756030469151, "learning_rate": 8.967040899689953e-06, "loss": 0.543, "step": 1044 }, { "epoch": 0.2325322652425456, "grad_norm": 0.9037589108816699, "learning_rate": 8.964846412496302e-06, "loss": 0.5235, "step": 1045 }, { "epoch": 0.23275478415665332, "grad_norm": 1.073825577091321, "learning_rate": 8.962649865847413e-06, "loss": 0.5273, "step": 1046 }, { "epoch": 0.232977303070761, "grad_norm": 1.0172832905076834, "learning_rate": 8.960451260884233e-06, "loss": 0.5492, "step": 1047 }, { "epoch": 0.23319982198486872, "grad_norm": 0.9799136576148283, "learning_rate": 8.958250598748785e-06, "loss": 0.5463, "step": 1048 }, { "epoch": 0.2334223408989764, "grad_norm": 0.8785258170575819, "learning_rate": 8.956047880584153e-06, "loss": 0.5312, "step": 1049 }, { "epoch": 0.2336448598130841, "grad_norm": 0.8781362624405948, "learning_rate": 8.953843107534492e-06, "loss": 0.5372, "step": 1050 }, { "epoch": 0.23386737872719182, "grad_norm": 1.0093441137758368, "learning_rate": 8.951636280745028e-06, "loss": 0.5362, "step": 1051 }, { "epoch": 0.2340898976412995, "grad_norm": 1.0108776088180895, "learning_rate": 8.949427401362047e-06, "loss": 0.5595, "step": 1052 }, { "epoch": 0.23431241655540722, "grad_norm": 0.948504370274971, "learning_rate": 8.947216470532904e-06, "loss": 0.5469, "step": 1053 }, { "epoch": 0.2345349354695149, "grad_norm": 0.9099087408932108, "learning_rate": 8.945003489406023e-06, "loss": 0.5534, "step": 1054 }, { "epoch": 0.2347574543836226, "grad_norm": 0.9588058735262316, "learning_rate": 8.94278845913089e-06, "loss": 0.5488, "step": 1055 }, { "epoch": 0.2349799732977303, "grad_norm": 0.9542150809423197, "learning_rate": 8.94057138085805e-06, "loss": 0.5218, "step": 1056 }, { "epoch": 0.235202492211838, "grad_norm": 0.9146485329070638, "learning_rate": 8.938352255739124e-06, "loss": 0.5171, "step": 1057 }, { "epoch": 0.23542501112594572, "grad_norm": 0.8910463991860347, "learning_rate": 8.936131084926785e-06, "loss": 0.5444, "step": 1058 }, { "epoch": 0.2356475300400534, "grad_norm": 0.8809312914759677, "learning_rate": 8.933907869574776e-06, "loss": 0.5361, "step": 1059 }, { "epoch": 0.2358700489541611, "grad_norm": 0.8854444735004111, "learning_rate": 8.931682610837897e-06, "loss": 0.5549, "step": 1060 }, { "epoch": 0.2360925678682688, "grad_norm": 1.0018933467170128, "learning_rate": 8.929455309872011e-06, "loss": 0.5462, "step": 1061 }, { "epoch": 0.2363150867823765, "grad_norm": 0.8979136669483564, "learning_rate": 8.927225967834045e-06, "loss": 0.5358, "step": 1062 }, { "epoch": 0.2365376056964842, "grad_norm": 0.8689640097619505, "learning_rate": 8.92499458588198e-06, "loss": 0.5385, "step": 1063 }, { "epoch": 0.2367601246105919, "grad_norm": 0.8456761396852952, "learning_rate": 8.92276116517486e-06, "loss": 0.5145, "step": 1064 }, { "epoch": 0.2369826435246996, "grad_norm": 0.9109807420222844, "learning_rate": 8.920525706872791e-06, "loss": 0.5296, "step": 1065 }, { "epoch": 0.2372051624388073, "grad_norm": 0.847526820231569, "learning_rate": 8.918288212136935e-06, "loss": 0.5356, "step": 1066 }, { "epoch": 0.237427681352915, "grad_norm": 0.8541795403377543, "learning_rate": 8.916048682129504e-06, "loss": 0.5397, "step": 1067 }, { "epoch": 0.2376502002670227, "grad_norm": 0.8898176515336585, "learning_rate": 8.913807118013782e-06, "loss": 0.53, "step": 1068 }, { "epoch": 0.2378727191811304, "grad_norm": 0.8702837876413198, "learning_rate": 8.911563520954099e-06, "loss": 0.5224, "step": 1069 }, { "epoch": 0.23809523809523808, "grad_norm": 0.8909766595393563, "learning_rate": 8.909317892115842e-06, "loss": 0.5383, "step": 1070 }, { "epoch": 0.2383177570093458, "grad_norm": 0.8618641517460405, "learning_rate": 8.907070232665457e-06, "loss": 0.5248, "step": 1071 }, { "epoch": 0.2385402759234535, "grad_norm": 0.8955089351964332, "learning_rate": 8.904820543770445e-06, "loss": 0.5227, "step": 1072 }, { "epoch": 0.2387627948375612, "grad_norm": 0.957260157532135, "learning_rate": 8.902568826599354e-06, "loss": 0.5296, "step": 1073 }, { "epoch": 0.2389853137516689, "grad_norm": 0.9292277717931159, "learning_rate": 8.900315082321795e-06, "loss": 0.5317, "step": 1074 }, { "epoch": 0.23920783266577658, "grad_norm": 0.9387449925564598, "learning_rate": 8.898059312108427e-06, "loss": 0.5191, "step": 1075 }, { "epoch": 0.2394303515798843, "grad_norm": 1.0150515844683345, "learning_rate": 8.89580151713096e-06, "loss": 0.5418, "step": 1076 }, { "epoch": 0.23965287049399198, "grad_norm": 1.0505925404518486, "learning_rate": 8.89354169856216e-06, "loss": 0.5295, "step": 1077 }, { "epoch": 0.2398753894080997, "grad_norm": 0.8734378965866477, "learning_rate": 8.89127985757584e-06, "loss": 0.5386, "step": 1078 }, { "epoch": 0.2400979083222074, "grad_norm": 0.9312779708298966, "learning_rate": 8.889015995346865e-06, "loss": 0.543, "step": 1079 }, { "epoch": 0.24032042723631508, "grad_norm": 0.9810275520432983, "learning_rate": 8.88675011305115e-06, "loss": 0.5507, "step": 1080 }, { "epoch": 0.2405429461504228, "grad_norm": 0.9147131460061039, "learning_rate": 8.884482211865663e-06, "loss": 0.5334, "step": 1081 }, { "epoch": 0.24076546506453048, "grad_norm": 0.9681374407488863, "learning_rate": 8.882212292968412e-06, "loss": 0.5348, "step": 1082 }, { "epoch": 0.2409879839786382, "grad_norm": 0.9114118749552996, "learning_rate": 8.879940357538462e-06, "loss": 0.5307, "step": 1083 }, { "epoch": 0.24121050289274587, "grad_norm": 0.9337044097900901, "learning_rate": 8.87766640675592e-06, "loss": 0.5405, "step": 1084 }, { "epoch": 0.24143302180685358, "grad_norm": 0.859026816609423, "learning_rate": 8.87539044180194e-06, "loss": 0.5168, "step": 1085 }, { "epoch": 0.2416555407209613, "grad_norm": 0.8713757770450685, "learning_rate": 8.873112463858726e-06, "loss": 0.5152, "step": 1086 }, { "epoch": 0.24187805963506898, "grad_norm": 0.9099899677914134, "learning_rate": 8.870832474109525e-06, "loss": 0.5173, "step": 1087 }, { "epoch": 0.2421005785491767, "grad_norm": 0.9533167840783866, "learning_rate": 8.868550473738629e-06, "loss": 0.5221, "step": 1088 }, { "epoch": 0.24232309746328437, "grad_norm": 0.9831605015026654, "learning_rate": 8.866266463931374e-06, "loss": 0.5435, "step": 1089 }, { "epoch": 0.24254561637739208, "grad_norm": 0.8929531234233674, "learning_rate": 8.86398044587414e-06, "loss": 0.5269, "step": 1090 }, { "epoch": 0.24276813529149976, "grad_norm": 0.8358445876788178, "learning_rate": 8.861692420754353e-06, "loss": 0.5274, "step": 1091 }, { "epoch": 0.24299065420560748, "grad_norm": 1.0303215172961373, "learning_rate": 8.859402389760475e-06, "loss": 0.5473, "step": 1092 }, { "epoch": 0.2432131731197152, "grad_norm": 0.9121934769103938, "learning_rate": 8.857110354082018e-06, "loss": 0.5426, "step": 1093 }, { "epoch": 0.24343569203382287, "grad_norm": 0.9171894480356578, "learning_rate": 8.854816314909527e-06, "loss": 0.5368, "step": 1094 }, { "epoch": 0.24365821094793058, "grad_norm": 0.9018615788129647, "learning_rate": 8.852520273434597e-06, "loss": 0.5438, "step": 1095 }, { "epoch": 0.24388072986203826, "grad_norm": 0.8573683253756494, "learning_rate": 8.850222230849854e-06, "loss": 0.5267, "step": 1096 }, { "epoch": 0.24410324877614598, "grad_norm": 0.8823811925486141, "learning_rate": 8.847922188348969e-06, "loss": 0.5241, "step": 1097 }, { "epoch": 0.24432576769025366, "grad_norm": 0.8372791489178069, "learning_rate": 8.84562014712665e-06, "loss": 0.5517, "step": 1098 }, { "epoch": 0.24454828660436137, "grad_norm": 0.8851818833711913, "learning_rate": 8.843316108378642e-06, "loss": 0.5215, "step": 1099 }, { "epoch": 0.24477080551846908, "grad_norm": 0.8425477870790743, "learning_rate": 8.841010073301733e-06, "loss": 0.541, "step": 1100 }, { "epoch": 0.24499332443257676, "grad_norm": 0.8350305021271855, "learning_rate": 8.838702043093739e-06, "loss": 0.5349, "step": 1101 }, { "epoch": 0.24521584334668448, "grad_norm": 0.830223223578072, "learning_rate": 8.83639201895352e-06, "loss": 0.5177, "step": 1102 }, { "epoch": 0.24543836226079216, "grad_norm": 0.8773427693140864, "learning_rate": 8.834080002080968e-06, "loss": 0.5422, "step": 1103 }, { "epoch": 0.24566088117489987, "grad_norm": 0.9243758521307627, "learning_rate": 8.831765993677012e-06, "loss": 0.5163, "step": 1104 }, { "epoch": 0.24588340008900755, "grad_norm": 0.9340782241548541, "learning_rate": 8.829449994943614e-06, "loss": 0.5322, "step": 1105 }, { "epoch": 0.24610591900311526, "grad_norm": 0.9604836549278019, "learning_rate": 8.82713200708377e-06, "loss": 0.5333, "step": 1106 }, { "epoch": 0.24632843791722298, "grad_norm": 0.9231549268460344, "learning_rate": 8.824812031301511e-06, "loss": 0.5323, "step": 1107 }, { "epoch": 0.24655095683133066, "grad_norm": 0.9580006326392261, "learning_rate": 8.822490068801896e-06, "loss": 0.5437, "step": 1108 }, { "epoch": 0.24677347574543837, "grad_norm": 0.8916569386918449, "learning_rate": 8.820166120791023e-06, "loss": 0.5292, "step": 1109 }, { "epoch": 0.24699599465954605, "grad_norm": 0.8936938436206254, "learning_rate": 8.817840188476015e-06, "loss": 0.543, "step": 1110 }, { "epoch": 0.24721851357365376, "grad_norm": 0.9073248387342889, "learning_rate": 8.815512273065028e-06, "loss": 0.537, "step": 1111 }, { "epoch": 0.24744103248776145, "grad_norm": 0.9828930113720948, "learning_rate": 8.813182375767249e-06, "loss": 0.4922, "step": 1112 }, { "epoch": 0.24766355140186916, "grad_norm": 0.9416000453862959, "learning_rate": 8.810850497792895e-06, "loss": 0.5413, "step": 1113 }, { "epoch": 0.24788607031597687, "grad_norm": 0.9241688529541101, "learning_rate": 8.80851664035321e-06, "loss": 0.5295, "step": 1114 }, { "epoch": 0.24810858923008455, "grad_norm": 0.9021269471762381, "learning_rate": 8.806180804660462e-06, "loss": 0.5325, "step": 1115 }, { "epoch": 0.24833110814419226, "grad_norm": 0.9542151631327481, "learning_rate": 8.803842991927955e-06, "loss": 0.5283, "step": 1116 }, { "epoch": 0.24855362705829995, "grad_norm": 0.929660645868257, "learning_rate": 8.801503203370019e-06, "loss": 0.5389, "step": 1117 }, { "epoch": 0.24877614597240766, "grad_norm": 0.9476370117445395, "learning_rate": 8.799161440202002e-06, "loss": 0.5287, "step": 1118 }, { "epoch": 0.24899866488651534, "grad_norm": 0.9669885109801643, "learning_rate": 8.796817703640288e-06, "loss": 0.5243, "step": 1119 }, { "epoch": 0.24922118380062305, "grad_norm": 0.8893081147839849, "learning_rate": 8.794471994902277e-06, "loss": 0.5262, "step": 1120 }, { "epoch": 0.24944370271473076, "grad_norm": 0.8873633922253086, "learning_rate": 8.7921243152064e-06, "loss": 0.5102, "step": 1121 }, { "epoch": 0.24966622162883845, "grad_norm": 0.9133449678396866, "learning_rate": 8.789774665772109e-06, "loss": 0.524, "step": 1122 }, { "epoch": 0.24988874054294616, "grad_norm": 0.845359629371758, "learning_rate": 8.787423047819878e-06, "loss": 0.5311, "step": 1123 }, { "epoch": 0.25011125945705387, "grad_norm": 1.0585732811946011, "learning_rate": 8.785069462571208e-06, "loss": 0.5369, "step": 1124 }, { "epoch": 0.25033377837116155, "grad_norm": 0.9034898518969056, "learning_rate": 8.782713911248616e-06, "loss": 0.5339, "step": 1125 }, { "epoch": 0.25055629728526924, "grad_norm": 0.8796213842395544, "learning_rate": 8.780356395075644e-06, "loss": 0.5364, "step": 1126 }, { "epoch": 0.2507788161993769, "grad_norm": 0.9346071801238947, "learning_rate": 8.777996915276854e-06, "loss": 0.5353, "step": 1127 }, { "epoch": 0.25100133511348466, "grad_norm": 0.901897695338746, "learning_rate": 8.775635473077828e-06, "loss": 0.5465, "step": 1128 }, { "epoch": 0.25122385402759234, "grad_norm": 0.9104828874768455, "learning_rate": 8.773272069705165e-06, "loss": 0.517, "step": 1129 }, { "epoch": 0.2514463729417, "grad_norm": 0.9569572807188802, "learning_rate": 8.770906706386488e-06, "loss": 0.5324, "step": 1130 }, { "epoch": 0.25166889185580776, "grad_norm": 0.8689603822694386, "learning_rate": 8.768539384350432e-06, "loss": 0.5348, "step": 1131 }, { "epoch": 0.25189141076991545, "grad_norm": 0.9036115825632126, "learning_rate": 8.766170104826655e-06, "loss": 0.5283, "step": 1132 }, { "epoch": 0.25211392968402313, "grad_norm": 0.8924414631766677, "learning_rate": 8.763798869045823e-06, "loss": 0.5313, "step": 1133 }, { "epoch": 0.2523364485981308, "grad_norm": 0.940815623859512, "learning_rate": 8.76142567823963e-06, "loss": 0.5246, "step": 1134 }, { "epoch": 0.25255896751223855, "grad_norm": 0.916993266686587, "learning_rate": 8.759050533640778e-06, "loss": 0.5207, "step": 1135 }, { "epoch": 0.25278148642634624, "grad_norm": 0.9459624978870493, "learning_rate": 8.756673436482984e-06, "loss": 0.5171, "step": 1136 }, { "epoch": 0.2530040053404539, "grad_norm": 0.926712751096307, "learning_rate": 8.754294388000984e-06, "loss": 0.549, "step": 1137 }, { "epoch": 0.25322652425456166, "grad_norm": 0.987917642455989, "learning_rate": 8.751913389430518e-06, "loss": 0.5514, "step": 1138 }, { "epoch": 0.25344904316866934, "grad_norm": 0.970335592645741, "learning_rate": 8.749530442008352e-06, "loss": 0.5275, "step": 1139 }, { "epoch": 0.253671562082777, "grad_norm": 0.8700489455402152, "learning_rate": 8.747145546972252e-06, "loss": 0.5285, "step": 1140 }, { "epoch": 0.2538940809968847, "grad_norm": 0.9471466985597442, "learning_rate": 8.744758705561004e-06, "loss": 0.5267, "step": 1141 }, { "epoch": 0.25411659991099245, "grad_norm": 0.8842635985740396, "learning_rate": 8.742369919014401e-06, "loss": 0.5215, "step": 1142 }, { "epoch": 0.25433911882510013, "grad_norm": 0.9135608253045406, "learning_rate": 8.73997918857325e-06, "loss": 0.5312, "step": 1143 }, { "epoch": 0.2545616377392078, "grad_norm": 0.9278471545039271, "learning_rate": 8.73758651547936e-06, "loss": 0.5417, "step": 1144 }, { "epoch": 0.25478415665331555, "grad_norm": 0.8311800996904016, "learning_rate": 8.735191900975559e-06, "loss": 0.5568, "step": 1145 }, { "epoch": 0.25500667556742324, "grad_norm": 0.9280584801498089, "learning_rate": 8.732795346305675e-06, "loss": 0.5281, "step": 1146 }, { "epoch": 0.2552291944815309, "grad_norm": 0.8458370375337874, "learning_rate": 8.730396852714552e-06, "loss": 0.5221, "step": 1147 }, { "epoch": 0.2554517133956386, "grad_norm": 0.9124147065507806, "learning_rate": 8.727996421448034e-06, "loss": 0.5241, "step": 1148 }, { "epoch": 0.25567423230974634, "grad_norm": 0.894145175209602, "learning_rate": 8.72559405375297e-06, "loss": 0.5339, "step": 1149 }, { "epoch": 0.255896751223854, "grad_norm": 0.940169826002557, "learning_rate": 8.723189750877226e-06, "loss": 0.5281, "step": 1150 }, { "epoch": 0.2561192701379617, "grad_norm": 0.8419197258291288, "learning_rate": 8.72078351406966e-06, "loss": 0.5319, "step": 1151 }, { "epoch": 0.25634178905206945, "grad_norm": 0.9284567812861396, "learning_rate": 8.718375344580146e-06, "loss": 0.5252, "step": 1152 }, { "epoch": 0.25656430796617713, "grad_norm": 0.8738638724069986, "learning_rate": 8.715965243659553e-06, "loss": 0.5193, "step": 1153 }, { "epoch": 0.2567868268802848, "grad_norm": 0.9278718996170292, "learning_rate": 8.713553212559756e-06, "loss": 0.5277, "step": 1154 }, { "epoch": 0.2570093457943925, "grad_norm": 0.9603731244390857, "learning_rate": 8.711139252533636e-06, "loss": 0.5322, "step": 1155 }, { "epoch": 0.25723186470850024, "grad_norm": 0.8865778173626309, "learning_rate": 8.708723364835073e-06, "loss": 0.5248, "step": 1156 }, { "epoch": 0.2574543836226079, "grad_norm": 0.8913089791474715, "learning_rate": 8.706305550718945e-06, "loss": 0.5346, "step": 1157 }, { "epoch": 0.2576769025367156, "grad_norm": 0.882019927447749, "learning_rate": 8.703885811441138e-06, "loss": 0.5281, "step": 1158 }, { "epoch": 0.25789942145082334, "grad_norm": 0.9148369727134269, "learning_rate": 8.701464148258534e-06, "loss": 0.5349, "step": 1159 }, { "epoch": 0.258121940364931, "grad_norm": 0.9113953665977538, "learning_rate": 8.699040562429013e-06, "loss": 0.5324, "step": 1160 }, { "epoch": 0.2583444592790387, "grad_norm": 0.9845663155713474, "learning_rate": 8.696615055211454e-06, "loss": 0.5458, "step": 1161 }, { "epoch": 0.2585669781931464, "grad_norm": 0.8533710689269953, "learning_rate": 8.694187627865737e-06, "loss": 0.5279, "step": 1162 }, { "epoch": 0.25878949710725413, "grad_norm": 0.9107415561756378, "learning_rate": 8.69175828165274e-06, "loss": 0.5185, "step": 1163 }, { "epoch": 0.2590120160213618, "grad_norm": 0.8876807310521592, "learning_rate": 8.68932701783433e-06, "loss": 0.5354, "step": 1164 }, { "epoch": 0.2592345349354695, "grad_norm": 0.9164642714523514, "learning_rate": 8.68689383767338e-06, "loss": 0.5376, "step": 1165 }, { "epoch": 0.25945705384957723, "grad_norm": 0.9227614692045213, "learning_rate": 8.68445874243375e-06, "loss": 0.5145, "step": 1166 }, { "epoch": 0.2596795727636849, "grad_norm": 0.8912346678878729, "learning_rate": 8.682021733380301e-06, "loss": 0.5035, "step": 1167 }, { "epoch": 0.2599020916777926, "grad_norm": 0.9064420469047891, "learning_rate": 8.679582811778885e-06, "loss": 0.5291, "step": 1168 }, { "epoch": 0.2601246105919003, "grad_norm": 0.8446620002024096, "learning_rate": 8.677141978896347e-06, "loss": 0.5335, "step": 1169 }, { "epoch": 0.260347129506008, "grad_norm": 0.8745373073407232, "learning_rate": 8.674699236000527e-06, "loss": 0.5258, "step": 1170 }, { "epoch": 0.2605696484201157, "grad_norm": 0.9406822628713444, "learning_rate": 8.672254584360255e-06, "loss": 0.5253, "step": 1171 }, { "epoch": 0.2607921673342234, "grad_norm": 0.9173412022047298, "learning_rate": 8.669808025245356e-06, "loss": 0.5262, "step": 1172 }, { "epoch": 0.26101468624833113, "grad_norm": 0.8978060140123652, "learning_rate": 8.66735955992664e-06, "loss": 0.5212, "step": 1173 }, { "epoch": 0.2612372051624388, "grad_norm": 0.9142692444134203, "learning_rate": 8.66490918967591e-06, "loss": 0.5309, "step": 1174 }, { "epoch": 0.2614597240765465, "grad_norm": 0.8481883835092235, "learning_rate": 8.66245691576596e-06, "loss": 0.5083, "step": 1175 }, { "epoch": 0.2616822429906542, "grad_norm": 0.9822423273208042, "learning_rate": 8.660002739470573e-06, "loss": 0.5223, "step": 1176 }, { "epoch": 0.2619047619047619, "grad_norm": 0.8918200864354108, "learning_rate": 8.657546662064518e-06, "loss": 0.528, "step": 1177 }, { "epoch": 0.2621272808188696, "grad_norm": 0.8995106379627358, "learning_rate": 8.65508868482355e-06, "loss": 0.5281, "step": 1178 }, { "epoch": 0.2623497997329773, "grad_norm": 0.9987842301922719, "learning_rate": 8.652628809024415e-06, "loss": 0.532, "step": 1179 }, { "epoch": 0.262572318647085, "grad_norm": 0.8601626919574321, "learning_rate": 8.650167035944843e-06, "loss": 0.5273, "step": 1180 }, { "epoch": 0.2627948375611927, "grad_norm": 0.8682972392781965, "learning_rate": 8.64770336686355e-06, "loss": 0.5142, "step": 1181 }, { "epoch": 0.2630173564753004, "grad_norm": 0.9110385959599303, "learning_rate": 8.645237803060236e-06, "loss": 0.5101, "step": 1182 }, { "epoch": 0.2632398753894081, "grad_norm": 0.9246289064372583, "learning_rate": 8.642770345815586e-06, "loss": 0.5281, "step": 1183 }, { "epoch": 0.2634623943035158, "grad_norm": 0.8615329773119964, "learning_rate": 8.640300996411269e-06, "loss": 0.532, "step": 1184 }, { "epoch": 0.2636849132176235, "grad_norm": 0.9710766844601505, "learning_rate": 8.637829756129934e-06, "loss": 0.5355, "step": 1185 }, { "epoch": 0.2639074321317312, "grad_norm": 0.9288687962363491, "learning_rate": 8.635356626255216e-06, "loss": 0.5385, "step": 1186 }, { "epoch": 0.2641299510458389, "grad_norm": 0.8628915292669179, "learning_rate": 8.632881608071729e-06, "loss": 0.5156, "step": 1187 }, { "epoch": 0.2643524699599466, "grad_norm": 0.8876148459306064, "learning_rate": 8.630404702865069e-06, "loss": 0.5353, "step": 1188 }, { "epoch": 0.2645749888740543, "grad_norm": 0.8736035060023369, "learning_rate": 8.627925911921811e-06, "loss": 0.5441, "step": 1189 }, { "epoch": 0.26479750778816197, "grad_norm": 1.0107502449185009, "learning_rate": 8.625445236529512e-06, "loss": 0.5228, "step": 1190 }, { "epoch": 0.2650200267022697, "grad_norm": 0.9197710714152465, "learning_rate": 8.622962677976706e-06, "loss": 0.5221, "step": 1191 }, { "epoch": 0.2652425456163774, "grad_norm": 0.8948646059164931, "learning_rate": 8.620478237552902e-06, "loss": 0.5247, "step": 1192 }, { "epoch": 0.2654650645304851, "grad_norm": 0.8344045261234598, "learning_rate": 8.617991916548596e-06, "loss": 0.5324, "step": 1193 }, { "epoch": 0.2656875834445928, "grad_norm": 0.9199879904125011, "learning_rate": 8.615503716255249e-06, "loss": 0.5296, "step": 1194 }, { "epoch": 0.2659101023587005, "grad_norm": 0.8272702598752846, "learning_rate": 8.613013637965305e-06, "loss": 0.508, "step": 1195 }, { "epoch": 0.2661326212728082, "grad_norm": 0.8689391050828352, "learning_rate": 8.610521682972182e-06, "loss": 0.5212, "step": 1196 }, { "epoch": 0.26635514018691586, "grad_norm": 0.9687708068566014, "learning_rate": 8.608027852570276e-06, "loss": 0.5347, "step": 1197 }, { "epoch": 0.2665776591010236, "grad_norm": 0.9005429531064092, "learning_rate": 8.60553214805495e-06, "loss": 0.5206, "step": 1198 }, { "epoch": 0.2668001780151313, "grad_norm": 0.9616912262903422, "learning_rate": 8.60303457072255e-06, "loss": 0.5326, "step": 1199 }, { "epoch": 0.26702269692923897, "grad_norm": 0.9840436627910503, "learning_rate": 8.600535121870385e-06, "loss": 0.5548, "step": 1200 }, { "epoch": 0.2672452158433467, "grad_norm": 0.9816180390287405, "learning_rate": 8.598033802796741e-06, "loss": 0.5412, "step": 1201 }, { "epoch": 0.2674677347574544, "grad_norm": 0.8793134986844712, "learning_rate": 8.595530614800877e-06, "loss": 0.5094, "step": 1202 }, { "epoch": 0.2676902536715621, "grad_norm": 0.8673922392428781, "learning_rate": 8.59302555918302e-06, "loss": 0.5207, "step": 1203 }, { "epoch": 0.26791277258566976, "grad_norm": 0.8640070822851904, "learning_rate": 8.590518637244366e-06, "loss": 0.5323, "step": 1204 }, { "epoch": 0.2681352914997775, "grad_norm": 0.9969502531386885, "learning_rate": 8.588009850287086e-06, "loss": 0.5602, "step": 1205 }, { "epoch": 0.2683578104138852, "grad_norm": 0.9544411902418191, "learning_rate": 8.585499199614315e-06, "loss": 0.5412, "step": 1206 }, { "epoch": 0.26858032932799286, "grad_norm": 0.9953632334890409, "learning_rate": 8.582986686530155e-06, "loss": 0.5204, "step": 1207 }, { "epoch": 0.2688028482421006, "grad_norm": 0.8995996451937165, "learning_rate": 8.580472312339681e-06, "loss": 0.5223, "step": 1208 }, { "epoch": 0.2690253671562083, "grad_norm": 0.9510571809389443, "learning_rate": 8.577956078348928e-06, "loss": 0.5284, "step": 1209 }, { "epoch": 0.26924788607031597, "grad_norm": 0.8704985565123124, "learning_rate": 8.5754379858649e-06, "loss": 0.5516, "step": 1210 }, { "epoch": 0.26947040498442365, "grad_norm": 0.8797277347158406, "learning_rate": 8.57291803619557e-06, "loss": 0.5221, "step": 1211 }, { "epoch": 0.2696929238985314, "grad_norm": 0.9092331633087658, "learning_rate": 8.57039623064987e-06, "loss": 0.5274, "step": 1212 }, { "epoch": 0.2699154428126391, "grad_norm": 0.9228732106201469, "learning_rate": 8.567872570537696e-06, "loss": 0.5198, "step": 1213 }, { "epoch": 0.27013796172674676, "grad_norm": 0.8539157246023429, "learning_rate": 8.565347057169917e-06, "loss": 0.5399, "step": 1214 }, { "epoch": 0.2703604806408545, "grad_norm": 0.9595539434728477, "learning_rate": 8.56281969185835e-06, "loss": 0.5466, "step": 1215 }, { "epoch": 0.2705829995549622, "grad_norm": 0.9054988392691784, "learning_rate": 8.560290475915784e-06, "loss": 0.5304, "step": 1216 }, { "epoch": 0.27080551846906986, "grad_norm": 0.9095606154630796, "learning_rate": 8.557759410655966e-06, "loss": 0.5159, "step": 1217 }, { "epoch": 0.27102803738317754, "grad_norm": 0.9525236072195425, "learning_rate": 8.555226497393607e-06, "loss": 0.5277, "step": 1218 }, { "epoch": 0.2712505562972853, "grad_norm": 0.9893070975640328, "learning_rate": 8.552691737444369e-06, "loss": 0.5242, "step": 1219 }, { "epoch": 0.27147307521139297, "grad_norm": 0.895436195120842, "learning_rate": 8.550155132124885e-06, "loss": 0.5309, "step": 1220 }, { "epoch": 0.27169559412550065, "grad_norm": 0.8773847156404947, "learning_rate": 8.547616682752738e-06, "loss": 0.5246, "step": 1221 }, { "epoch": 0.2719181130396084, "grad_norm": 0.9364028413929837, "learning_rate": 8.545076390646472e-06, "loss": 0.5576, "step": 1222 }, { "epoch": 0.2721406319537161, "grad_norm": 0.9111021772662753, "learning_rate": 8.542534257125587e-06, "loss": 0.5223, "step": 1223 }, { "epoch": 0.27236315086782376, "grad_norm": 0.9322500788346039, "learning_rate": 8.539990283510542e-06, "loss": 0.5346, "step": 1224 }, { "epoch": 0.27258566978193144, "grad_norm": 0.9521613222337499, "learning_rate": 8.537444471122748e-06, "loss": 0.5273, "step": 1225 }, { "epoch": 0.2728081886960392, "grad_norm": 0.9197051278892416, "learning_rate": 8.534896821284575e-06, "loss": 0.5224, "step": 1226 }, { "epoch": 0.27303070761014686, "grad_norm": 0.9315841657279399, "learning_rate": 8.532347335319344e-06, "loss": 0.5227, "step": 1227 }, { "epoch": 0.27325322652425454, "grad_norm": 0.8955163659533416, "learning_rate": 8.529796014551333e-06, "loss": 0.5477, "step": 1228 }, { "epoch": 0.2734757454383623, "grad_norm": 0.9394352720772584, "learning_rate": 8.52724286030577e-06, "loss": 0.525, "step": 1229 }, { "epoch": 0.27369826435246997, "grad_norm": 0.9163892321563274, "learning_rate": 8.524687873908838e-06, "loss": 0.5207, "step": 1230 }, { "epoch": 0.27392078326657765, "grad_norm": 0.8373421085648125, "learning_rate": 8.522131056687669e-06, "loss": 0.5257, "step": 1231 }, { "epoch": 0.27414330218068533, "grad_norm": 1.0182992873568182, "learning_rate": 8.519572409970347e-06, "loss": 0.5346, "step": 1232 }, { "epoch": 0.27436582109479307, "grad_norm": 0.8891737164922632, "learning_rate": 8.51701193508591e-06, "loss": 0.5415, "step": 1233 }, { "epoch": 0.27458834000890076, "grad_norm": 0.8627439487999251, "learning_rate": 8.51444963336434e-06, "loss": 0.5358, "step": 1234 }, { "epoch": 0.27481085892300844, "grad_norm": 1.2626941332849664, "learning_rate": 8.511885506136568e-06, "loss": 0.5433, "step": 1235 }, { "epoch": 0.2750333778371162, "grad_norm": 0.9629328094463587, "learning_rate": 8.509319554734478e-06, "loss": 0.5336, "step": 1236 }, { "epoch": 0.27525589675122386, "grad_norm": 0.8675953995934077, "learning_rate": 8.5067517804909e-06, "loss": 0.5362, "step": 1237 }, { "epoch": 0.27547841566533154, "grad_norm": 0.8569832634018162, "learning_rate": 8.504182184739608e-06, "loss": 0.5276, "step": 1238 }, { "epoch": 0.2757009345794392, "grad_norm": 0.8597309113057279, "learning_rate": 8.501610768815322e-06, "loss": 0.533, "step": 1239 }, { "epoch": 0.27592345349354697, "grad_norm": 0.9073984786168448, "learning_rate": 8.499037534053713e-06, "loss": 0.5371, "step": 1240 }, { "epoch": 0.27614597240765465, "grad_norm": 0.8469059073157662, "learning_rate": 8.496462481791394e-06, "loss": 0.5278, "step": 1241 }, { "epoch": 0.27636849132176233, "grad_norm": 0.8788860532229705, "learning_rate": 8.493885613365916e-06, "loss": 0.5403, "step": 1242 }, { "epoch": 0.27659101023587007, "grad_norm": 0.850968497537422, "learning_rate": 8.491306930115784e-06, "loss": 0.5362, "step": 1243 }, { "epoch": 0.27681352914997776, "grad_norm": 0.9054428875002736, "learning_rate": 8.488726433380435e-06, "loss": 0.5311, "step": 1244 }, { "epoch": 0.27703604806408544, "grad_norm": 0.8988010512041472, "learning_rate": 8.486144124500257e-06, "loss": 0.5123, "step": 1245 }, { "epoch": 0.2772585669781931, "grad_norm": 0.8830449223505065, "learning_rate": 8.483560004816575e-06, "loss": 0.5125, "step": 1246 }, { "epoch": 0.27748108589230086, "grad_norm": 0.9270494549306392, "learning_rate": 8.480974075671655e-06, "loss": 0.5319, "step": 1247 }, { "epoch": 0.27770360480640854, "grad_norm": 0.9552314041024665, "learning_rate": 8.4783863384087e-06, "loss": 0.5373, "step": 1248 }, { "epoch": 0.2779261237205162, "grad_norm": 0.8953472318388607, "learning_rate": 8.47579679437186e-06, "loss": 0.5259, "step": 1249 }, { "epoch": 0.27814864263462397, "grad_norm": 0.847900951481098, "learning_rate": 8.473205444906216e-06, "loss": 0.5313, "step": 1250 }, { "epoch": 0.27837116154873165, "grad_norm": 0.9564264979210843, "learning_rate": 8.47061229135779e-06, "loss": 0.5257, "step": 1251 }, { "epoch": 0.27859368046283933, "grad_norm": 0.875337613555111, "learning_rate": 8.468017335073538e-06, "loss": 0.5287, "step": 1252 }, { "epoch": 0.278816199376947, "grad_norm": 0.8687496495747389, "learning_rate": 8.465420577401359e-06, "loss": 0.5377, "step": 1253 }, { "epoch": 0.27903871829105475, "grad_norm": 0.9054936840921656, "learning_rate": 8.46282201969008e-06, "loss": 0.5241, "step": 1254 }, { "epoch": 0.27926123720516244, "grad_norm": 0.8997980554999582, "learning_rate": 8.46022166328947e-06, "loss": 0.5478, "step": 1255 }, { "epoch": 0.2794837561192701, "grad_norm": 0.9866710382893747, "learning_rate": 8.457619509550227e-06, "loss": 0.5387, "step": 1256 }, { "epoch": 0.27970627503337786, "grad_norm": 0.9068261564381727, "learning_rate": 8.455015559823984e-06, "loss": 0.5323, "step": 1257 }, { "epoch": 0.27992879394748554, "grad_norm": 1.088516428994483, "learning_rate": 8.452409815463308e-06, "loss": 0.5299, "step": 1258 }, { "epoch": 0.2801513128615932, "grad_norm": 0.8510184578231984, "learning_rate": 8.4498022778217e-06, "loss": 0.5136, "step": 1259 }, { "epoch": 0.2803738317757009, "grad_norm": 0.926822141652265, "learning_rate": 8.447192948253583e-06, "loss": 0.5363, "step": 1260 }, { "epoch": 0.28059635068980865, "grad_norm": 0.8763311116641287, "learning_rate": 8.444581828114326e-06, "loss": 0.5317, "step": 1261 }, { "epoch": 0.28081886960391633, "grad_norm": 0.961156836601599, "learning_rate": 8.441968918760215e-06, "loss": 0.5382, "step": 1262 }, { "epoch": 0.281041388518024, "grad_norm": 0.8526524291927818, "learning_rate": 8.439354221548472e-06, "loss": 0.5194, "step": 1263 }, { "epoch": 0.28126390743213175, "grad_norm": 1.0200664704545597, "learning_rate": 8.436737737837246e-06, "loss": 0.5417, "step": 1264 }, { "epoch": 0.28148642634623944, "grad_norm": 0.8906470071805719, "learning_rate": 8.434119468985614e-06, "loss": 0.527, "step": 1265 }, { "epoch": 0.2817089452603471, "grad_norm": 0.9801161864752433, "learning_rate": 8.431499416353576e-06, "loss": 0.5255, "step": 1266 }, { "epoch": 0.2819314641744548, "grad_norm": 0.9336610790741552, "learning_rate": 8.428877581302071e-06, "loss": 0.5194, "step": 1267 }, { "epoch": 0.28215398308856254, "grad_norm": 0.9986410742443192, "learning_rate": 8.426253965192948e-06, "loss": 0.5359, "step": 1268 }, { "epoch": 0.2823765020026702, "grad_norm": 0.8838293031619415, "learning_rate": 8.423628569388991e-06, "loss": 0.5292, "step": 1269 }, { "epoch": 0.2825990209167779, "grad_norm": 1.055800287893566, "learning_rate": 8.421001395253907e-06, "loss": 0.5488, "step": 1270 }, { "epoch": 0.28282153983088565, "grad_norm": 0.8853973703741123, "learning_rate": 8.418372444152325e-06, "loss": 0.506, "step": 1271 }, { "epoch": 0.28304405874499333, "grad_norm": 0.8968351351385746, "learning_rate": 8.415741717449798e-06, "loss": 0.5199, "step": 1272 }, { "epoch": 0.283266577659101, "grad_norm": 0.9190828196051659, "learning_rate": 8.413109216512801e-06, "loss": 0.5299, "step": 1273 }, { "epoch": 0.2834890965732087, "grad_norm": 0.905321449218482, "learning_rate": 8.410474942708733e-06, "loss": 0.5401, "step": 1274 }, { "epoch": 0.28371161548731644, "grad_norm": 0.9242124910489987, "learning_rate": 8.407838897405907e-06, "loss": 0.5188, "step": 1275 }, { "epoch": 0.2839341344014241, "grad_norm": 0.9811316877202471, "learning_rate": 8.405201081973563e-06, "loss": 0.5124, "step": 1276 }, { "epoch": 0.2841566533155318, "grad_norm": 0.9260260297151884, "learning_rate": 8.40256149778186e-06, "loss": 0.5284, "step": 1277 }, { "epoch": 0.28437917222963954, "grad_norm": 0.7942329073628175, "learning_rate": 8.399920146201872e-06, "loss": 0.512, "step": 1278 }, { "epoch": 0.2846016911437472, "grad_norm": 0.8560790627591095, "learning_rate": 8.39727702860559e-06, "loss": 0.5224, "step": 1279 }, { "epoch": 0.2848242100578549, "grad_norm": 0.8398954840516579, "learning_rate": 8.394632146365931e-06, "loss": 0.5109, "step": 1280 }, { "epoch": 0.2850467289719626, "grad_norm": 0.9337042013457291, "learning_rate": 8.39198550085672e-06, "loss": 0.5365, "step": 1281 }, { "epoch": 0.28526924788607033, "grad_norm": 0.893962902311381, "learning_rate": 8.389337093452704e-06, "loss": 0.5306, "step": 1282 }, { "epoch": 0.285491766800178, "grad_norm": 0.8733961490883108, "learning_rate": 8.386686925529534e-06, "loss": 0.5169, "step": 1283 }, { "epoch": 0.2857142857142857, "grad_norm": 0.884977936442665, "learning_rate": 8.384034998463793e-06, "loss": 0.5297, "step": 1284 }, { "epoch": 0.28593680462839344, "grad_norm": 0.9163798500357908, "learning_rate": 8.381381313632962e-06, "loss": 0.5191, "step": 1285 }, { "epoch": 0.2861593235425011, "grad_norm": 0.8860732846210642, "learning_rate": 8.378725872415441e-06, "loss": 0.5319, "step": 1286 }, { "epoch": 0.2863818424566088, "grad_norm": 0.9461225070329083, "learning_rate": 8.376068676190547e-06, "loss": 0.5168, "step": 1287 }, { "epoch": 0.2866043613707165, "grad_norm": 0.8486474571049173, "learning_rate": 8.373409726338499e-06, "loss": 0.5322, "step": 1288 }, { "epoch": 0.2868268802848242, "grad_norm": 0.9167600880576469, "learning_rate": 8.370749024240436e-06, "loss": 0.5358, "step": 1289 }, { "epoch": 0.2870493991989319, "grad_norm": 0.8766657079511562, "learning_rate": 8.368086571278404e-06, "loss": 0.5165, "step": 1290 }, { "epoch": 0.2872719181130396, "grad_norm": 0.8498840257361983, "learning_rate": 8.365422368835352e-06, "loss": 0.5211, "step": 1291 }, { "epoch": 0.28749443702714733, "grad_norm": 0.951713116590929, "learning_rate": 8.362756418295148e-06, "loss": 0.5235, "step": 1292 }, { "epoch": 0.287716955941255, "grad_norm": 0.9022409232415829, "learning_rate": 8.360088721042563e-06, "loss": 0.5272, "step": 1293 }, { "epoch": 0.2879394748553627, "grad_norm": 0.8926626914002854, "learning_rate": 8.357419278463275e-06, "loss": 0.5143, "step": 1294 }, { "epoch": 0.2881619937694704, "grad_norm": 0.936571263212133, "learning_rate": 8.354748091943867e-06, "loss": 0.523, "step": 1295 }, { "epoch": 0.2883845126835781, "grad_norm": 0.8893879888941879, "learning_rate": 8.352075162871833e-06, "loss": 0.5201, "step": 1296 }, { "epoch": 0.2886070315976858, "grad_norm": 0.8815573649946586, "learning_rate": 8.349400492635568e-06, "loss": 0.5205, "step": 1297 }, { "epoch": 0.2888295505117935, "grad_norm": 0.9471949794777292, "learning_rate": 8.346724082624374e-06, "loss": 0.5205, "step": 1298 }, { "epoch": 0.2890520694259012, "grad_norm": 0.9027704895632578, "learning_rate": 8.344045934228455e-06, "loss": 0.5114, "step": 1299 }, { "epoch": 0.2892745883400089, "grad_norm": 0.9930683795690172, "learning_rate": 8.341366048838917e-06, "loss": 0.5287, "step": 1300 }, { "epoch": 0.2894971072541166, "grad_norm": 0.9715936907507949, "learning_rate": 8.338684427847768e-06, "loss": 0.5305, "step": 1301 }, { "epoch": 0.2897196261682243, "grad_norm": 0.9693010238064467, "learning_rate": 8.336001072647924e-06, "loss": 0.5511, "step": 1302 }, { "epoch": 0.289942145082332, "grad_norm": 0.8861285150854094, "learning_rate": 8.333315984633192e-06, "loss": 0.5172, "step": 1303 }, { "epoch": 0.2901646639964397, "grad_norm": 0.9407604616282725, "learning_rate": 8.330629165198286e-06, "loss": 0.5265, "step": 1304 }, { "epoch": 0.2903871829105474, "grad_norm": 0.893980695641105, "learning_rate": 8.327940615738819e-06, "loss": 0.5311, "step": 1305 }, { "epoch": 0.2906097018246551, "grad_norm": 0.9964583168076775, "learning_rate": 8.325250337651297e-06, "loss": 0.5377, "step": 1306 }, { "epoch": 0.2908322207387628, "grad_norm": 0.8784546501426929, "learning_rate": 8.322558332333132e-06, "loss": 0.5339, "step": 1307 }, { "epoch": 0.2910547396528705, "grad_norm": 1.0118528489136636, "learning_rate": 8.319864601182625e-06, "loss": 0.5257, "step": 1308 }, { "epoch": 0.29127725856697817, "grad_norm": 0.9103896078088639, "learning_rate": 8.31716914559898e-06, "loss": 0.5346, "step": 1309 }, { "epoch": 0.2914997774810859, "grad_norm": 0.8403101440617023, "learning_rate": 8.314471966982293e-06, "loss": 0.5197, "step": 1310 }, { "epoch": 0.2917222963951936, "grad_norm": 0.9776782424758279, "learning_rate": 8.311773066733559e-06, "loss": 0.5365, "step": 1311 }, { "epoch": 0.2919448153093013, "grad_norm": 0.9306974454791415, "learning_rate": 8.30907244625466e-06, "loss": 0.5184, "step": 1312 }, { "epoch": 0.292167334223409, "grad_norm": 0.9005469834717204, "learning_rate": 8.306370106948377e-06, "loss": 0.5366, "step": 1313 }, { "epoch": 0.2923898531375167, "grad_norm": 0.9214671452680736, "learning_rate": 8.303666050218385e-06, "loss": 0.5393, "step": 1314 }, { "epoch": 0.2926123720516244, "grad_norm": 0.8916499200248336, "learning_rate": 8.300960277469248e-06, "loss": 0.531, "step": 1315 }, { "epoch": 0.29283489096573206, "grad_norm": 0.8672554070184376, "learning_rate": 8.298252790106421e-06, "loss": 0.5194, "step": 1316 }, { "epoch": 0.2930574098798398, "grad_norm": 0.8845575688308699, "learning_rate": 8.29554358953625e-06, "loss": 0.5242, "step": 1317 }, { "epoch": 0.2932799287939475, "grad_norm": 0.8436879525797187, "learning_rate": 8.292832677165976e-06, "loss": 0.5258, "step": 1318 }, { "epoch": 0.29350244770805517, "grad_norm": 1.0992241949668127, "learning_rate": 8.29012005440372e-06, "loss": 0.542, "step": 1319 }, { "epoch": 0.2937249666221629, "grad_norm": 0.8500747664917383, "learning_rate": 8.287405722658499e-06, "loss": 0.5391, "step": 1320 }, { "epoch": 0.2939474855362706, "grad_norm": 0.9259959522390736, "learning_rate": 8.284689683340215e-06, "loss": 0.523, "step": 1321 }, { "epoch": 0.2941700044503783, "grad_norm": 0.8977534724706752, "learning_rate": 8.281971937859654e-06, "loss": 0.5228, "step": 1322 }, { "epoch": 0.29439252336448596, "grad_norm": 0.8868997675069736, "learning_rate": 8.279252487628495e-06, "loss": 0.5322, "step": 1323 }, { "epoch": 0.2946150422785937, "grad_norm": 0.906307912480755, "learning_rate": 8.276531334059296e-06, "loss": 0.5179, "step": 1324 }, { "epoch": 0.2948375611927014, "grad_norm": 0.9191796443566328, "learning_rate": 8.273808478565503e-06, "loss": 0.5159, "step": 1325 }, { "epoch": 0.29506008010680906, "grad_norm": 0.964443767891535, "learning_rate": 8.271083922561447e-06, "loss": 0.5181, "step": 1326 }, { "epoch": 0.2952825990209168, "grad_norm": 0.9743103802233849, "learning_rate": 8.26835766746234e-06, "loss": 0.5117, "step": 1327 }, { "epoch": 0.2955051179350245, "grad_norm": 0.8859950341579987, "learning_rate": 8.265629714684273e-06, "loss": 0.5389, "step": 1328 }, { "epoch": 0.29572763684913217, "grad_norm": 0.8703984587479551, "learning_rate": 8.26290006564423e-06, "loss": 0.5211, "step": 1329 }, { "epoch": 0.29595015576323985, "grad_norm": 0.8729503712174862, "learning_rate": 8.260168721760066e-06, "loss": 0.5185, "step": 1330 }, { "epoch": 0.2961726746773476, "grad_norm": 0.8815296980273809, "learning_rate": 8.25743568445052e-06, "loss": 0.5212, "step": 1331 }, { "epoch": 0.2963951935914553, "grad_norm": 0.8861055223322177, "learning_rate": 8.25470095513521e-06, "loss": 0.5121, "step": 1332 }, { "epoch": 0.29661771250556296, "grad_norm": 0.9281162595817742, "learning_rate": 8.25196453523463e-06, "loss": 0.5192, "step": 1333 }, { "epoch": 0.2968402314196707, "grad_norm": 0.8925580909022802, "learning_rate": 8.249226426170162e-06, "loss": 0.5242, "step": 1334 }, { "epoch": 0.2970627503337784, "grad_norm": 0.9408083737452646, "learning_rate": 8.246486629364052e-06, "loss": 0.5206, "step": 1335 }, { "epoch": 0.29728526924788606, "grad_norm": 0.9423930353708976, "learning_rate": 8.243745146239434e-06, "loss": 0.5215, "step": 1336 }, { "epoch": 0.29750778816199375, "grad_norm": 0.9929504877789058, "learning_rate": 8.24100197822031e-06, "loss": 0.5416, "step": 1337 }, { "epoch": 0.2977303070761015, "grad_norm": 0.9097782969312816, "learning_rate": 8.238257126731561e-06, "loss": 0.5181, "step": 1338 }, { "epoch": 0.29795282599020917, "grad_norm": 0.8647523176055336, "learning_rate": 8.235510593198943e-06, "loss": 0.5239, "step": 1339 }, { "epoch": 0.29817534490431685, "grad_norm": 0.9200777537406342, "learning_rate": 8.232762379049082e-06, "loss": 0.5322, "step": 1340 }, { "epoch": 0.2983978638184246, "grad_norm": 0.9111522368967903, "learning_rate": 8.230012485709484e-06, "loss": 0.506, "step": 1341 }, { "epoch": 0.2986203827325323, "grad_norm": 0.8728528536835171, "learning_rate": 8.227260914608516e-06, "loss": 0.5271, "step": 1342 }, { "epoch": 0.29884290164663996, "grad_norm": 0.9216250345276107, "learning_rate": 8.22450766717543e-06, "loss": 0.5214, "step": 1343 }, { "epoch": 0.29906542056074764, "grad_norm": 0.9650910066926489, "learning_rate": 8.221752744840338e-06, "loss": 0.5348, "step": 1344 }, { "epoch": 0.2992879394748554, "grad_norm": 0.9332861243630077, "learning_rate": 8.218996149034228e-06, "loss": 0.5408, "step": 1345 }, { "epoch": 0.29951045838896306, "grad_norm": 0.9291623864539831, "learning_rate": 8.216237881188952e-06, "loss": 0.536, "step": 1346 }, { "epoch": 0.29973297730307075, "grad_norm": 1.0081408607045732, "learning_rate": 8.213477942737237e-06, "loss": 0.526, "step": 1347 }, { "epoch": 0.2999554962171785, "grad_norm": 0.8772898733834777, "learning_rate": 8.210716335112671e-06, "loss": 0.5202, "step": 1348 }, { "epoch": 0.30017801513128617, "grad_norm": 0.9751743737545128, "learning_rate": 8.207953059749717e-06, "loss": 0.5301, "step": 1349 }, { "epoch": 0.30040053404539385, "grad_norm": 1.1200969858900487, "learning_rate": 8.205188118083695e-06, "loss": 0.5174, "step": 1350 }, { "epoch": 0.30062305295950154, "grad_norm": 0.8703076576000002, "learning_rate": 8.202421511550799e-06, "loss": 0.513, "step": 1351 }, { "epoch": 0.3008455718736093, "grad_norm": 0.9202226670388272, "learning_rate": 8.199653241588081e-06, "loss": 0.5119, "step": 1352 }, { "epoch": 0.30106809078771696, "grad_norm": 0.9690009202254072, "learning_rate": 8.196883309633461e-06, "loss": 0.5229, "step": 1353 }, { "epoch": 0.30129060970182464, "grad_norm": 0.8874516414332113, "learning_rate": 8.194111717125722e-06, "loss": 0.5366, "step": 1354 }, { "epoch": 0.3015131286159324, "grad_norm": 1.0059088938478211, "learning_rate": 8.191338465504508e-06, "loss": 0.5379, "step": 1355 }, { "epoch": 0.30173564753004006, "grad_norm": 1.0702035572357027, "learning_rate": 8.188563556210328e-06, "loss": 0.5272, "step": 1356 }, { "epoch": 0.30195816644414775, "grad_norm": 0.8773232041866774, "learning_rate": 8.185786990684545e-06, "loss": 0.5344, "step": 1357 }, { "epoch": 0.30218068535825543, "grad_norm": 0.8740983884854258, "learning_rate": 8.183008770369392e-06, "loss": 0.5232, "step": 1358 }, { "epoch": 0.30240320427236317, "grad_norm": 0.9514445896573991, "learning_rate": 8.180228896707952e-06, "loss": 0.5103, "step": 1359 }, { "epoch": 0.30262572318647085, "grad_norm": 0.9113293830769787, "learning_rate": 8.177447371144175e-06, "loss": 0.5277, "step": 1360 }, { "epoch": 0.30284824210057854, "grad_norm": 0.9407710223336099, "learning_rate": 8.174664195122863e-06, "loss": 0.5207, "step": 1361 }, { "epoch": 0.3030707610146863, "grad_norm": 0.8962497142723718, "learning_rate": 8.171879370089679e-06, "loss": 0.5278, "step": 1362 }, { "epoch": 0.30329327992879396, "grad_norm": 0.9211345355656212, "learning_rate": 8.169092897491141e-06, "loss": 0.5293, "step": 1363 }, { "epoch": 0.30351579884290164, "grad_norm": 0.8540136642586145, "learning_rate": 8.166304778774624e-06, "loss": 0.5231, "step": 1364 }, { "epoch": 0.3037383177570093, "grad_norm": 0.9821007083606118, "learning_rate": 8.163515015388353e-06, "loss": 0.5388, "step": 1365 }, { "epoch": 0.30396083667111706, "grad_norm": 0.9804091853752109, "learning_rate": 8.160723608781416e-06, "loss": 0.5178, "step": 1366 }, { "epoch": 0.30418335558522475, "grad_norm": 0.8554265014712279, "learning_rate": 8.157930560403746e-06, "loss": 0.5176, "step": 1367 }, { "epoch": 0.30440587449933243, "grad_norm": 0.965929391427452, "learning_rate": 8.155135871706136e-06, "loss": 0.5172, "step": 1368 }, { "epoch": 0.30462839341344017, "grad_norm": 0.9747685808959208, "learning_rate": 8.152339544140226e-06, "loss": 0.5139, "step": 1369 }, { "epoch": 0.30485091232754785, "grad_norm": 0.8847664638560099, "learning_rate": 8.149541579158511e-06, "loss": 0.5297, "step": 1370 }, { "epoch": 0.30507343124165553, "grad_norm": 0.9235658274683224, "learning_rate": 8.14674197821433e-06, "loss": 0.5169, "step": 1371 }, { "epoch": 0.3052959501557632, "grad_norm": 0.975828786710518, "learning_rate": 8.143940742761881e-06, "loss": 0.5263, "step": 1372 }, { "epoch": 0.30551846906987096, "grad_norm": 0.8879296110159627, "learning_rate": 8.141137874256204e-06, "loss": 0.5174, "step": 1373 }, { "epoch": 0.30574098798397864, "grad_norm": 0.930068500440486, "learning_rate": 8.138333374153187e-06, "loss": 0.5147, "step": 1374 }, { "epoch": 0.3059635068980863, "grad_norm": 0.9141861253894993, "learning_rate": 8.135527243909574e-06, "loss": 0.5354, "step": 1375 }, { "epoch": 0.30618602581219406, "grad_norm": 0.8856261081604482, "learning_rate": 8.132719484982945e-06, "loss": 0.5368, "step": 1376 }, { "epoch": 0.30640854472630175, "grad_norm": 0.9658976226765815, "learning_rate": 8.129910098831732e-06, "loss": 0.5177, "step": 1377 }, { "epoch": 0.30663106364040943, "grad_norm": 0.8747231546971982, "learning_rate": 8.127099086915212e-06, "loss": 0.5189, "step": 1378 }, { "epoch": 0.3068535825545171, "grad_norm": 0.8663133042452534, "learning_rate": 8.124286450693503e-06, "loss": 0.5223, "step": 1379 }, { "epoch": 0.30707610146862485, "grad_norm": 0.846159610048521, "learning_rate": 8.121472191627572e-06, "loss": 0.5268, "step": 1380 }, { "epoch": 0.30729862038273253, "grad_norm": 0.8802995435759506, "learning_rate": 8.118656311179226e-06, "loss": 0.5304, "step": 1381 }, { "epoch": 0.3075211392968402, "grad_norm": 0.8525299190187032, "learning_rate": 8.11583881081111e-06, "loss": 0.5181, "step": 1382 }, { "epoch": 0.30774365821094796, "grad_norm": 0.8304303722786894, "learning_rate": 8.11301969198672e-06, "loss": 0.5197, "step": 1383 }, { "epoch": 0.30796617712505564, "grad_norm": 0.848586431213214, "learning_rate": 8.110198956170383e-06, "loss": 0.5053, "step": 1384 }, { "epoch": 0.3081886960391633, "grad_norm": 0.8341356752298017, "learning_rate": 8.107376604827275e-06, "loss": 0.5221, "step": 1385 }, { "epoch": 0.308411214953271, "grad_norm": 0.860098285994161, "learning_rate": 8.104552639423402e-06, "loss": 0.513, "step": 1386 }, { "epoch": 0.30863373386737875, "grad_norm": 0.8525975299462005, "learning_rate": 8.101727061425614e-06, "loss": 0.5391, "step": 1387 }, { "epoch": 0.30885625278148643, "grad_norm": 0.8626069423782267, "learning_rate": 8.0988998723016e-06, "loss": 0.5087, "step": 1388 }, { "epoch": 0.3090787716955941, "grad_norm": 0.8628686293272436, "learning_rate": 8.09607107351988e-06, "loss": 0.5061, "step": 1389 }, { "epoch": 0.30930129060970185, "grad_norm": 0.9208576100885597, "learning_rate": 8.093240666549816e-06, "loss": 0.5304, "step": 1390 }, { "epoch": 0.30952380952380953, "grad_norm": 0.8321877610540442, "learning_rate": 8.090408652861603e-06, "loss": 0.522, "step": 1391 }, { "epoch": 0.3097463284379172, "grad_norm": 0.9089034845542446, "learning_rate": 8.087575033926267e-06, "loss": 0.536, "step": 1392 }, { "epoch": 0.3099688473520249, "grad_norm": 0.9432515783309082, "learning_rate": 8.084739811215672e-06, "loss": 0.546, "step": 1393 }, { "epoch": 0.31019136626613264, "grad_norm": 0.8847421146446793, "learning_rate": 8.081902986202517e-06, "loss": 0.5181, "step": 1394 }, { "epoch": 0.3104138851802403, "grad_norm": 0.959867665505344, "learning_rate": 8.07906456036033e-06, "loss": 0.5093, "step": 1395 }, { "epoch": 0.310636404094348, "grad_norm": 0.9077268413035757, "learning_rate": 8.076224535163468e-06, "loss": 0.5124, "step": 1396 }, { "epoch": 0.31085892300845575, "grad_norm": 0.8257730135061107, "learning_rate": 8.073382912087124e-06, "loss": 0.5132, "step": 1397 }, { "epoch": 0.31108144192256343, "grad_norm": 0.8818182160239487, "learning_rate": 8.07053969260732e-06, "loss": 0.5204, "step": 1398 }, { "epoch": 0.3113039608366711, "grad_norm": 0.9051941240846735, "learning_rate": 8.067694878200903e-06, "loss": 0.5327, "step": 1399 }, { "epoch": 0.3115264797507788, "grad_norm": 0.8963674017629025, "learning_rate": 8.064848470345551e-06, "loss": 0.4962, "step": 1400 }, { "epoch": 0.31174899866488653, "grad_norm": 0.851604272195562, "learning_rate": 8.062000470519775e-06, "loss": 0.5037, "step": 1401 }, { "epoch": 0.3119715175789942, "grad_norm": 0.9442187074079312, "learning_rate": 8.059150880202902e-06, "loss": 0.5354, "step": 1402 }, { "epoch": 0.3121940364931019, "grad_norm": 0.9004073985063836, "learning_rate": 8.056299700875096e-06, "loss": 0.5278, "step": 1403 }, { "epoch": 0.31241655540720964, "grad_norm": 0.9076609444334955, "learning_rate": 8.05344693401734e-06, "loss": 0.5211, "step": 1404 }, { "epoch": 0.3126390743213173, "grad_norm": 0.987603666451543, "learning_rate": 8.050592581111441e-06, "loss": 0.5285, "step": 1405 }, { "epoch": 0.312861593235425, "grad_norm": 0.935267154306952, "learning_rate": 8.047736643640034e-06, "loss": 0.5287, "step": 1406 }, { "epoch": 0.3130841121495327, "grad_norm": 0.9260767787205283, "learning_rate": 8.044879123086575e-06, "loss": 0.5216, "step": 1407 }, { "epoch": 0.31330663106364043, "grad_norm": 0.9091840189069964, "learning_rate": 8.04202002093534e-06, "loss": 0.5172, "step": 1408 }, { "epoch": 0.3135291499777481, "grad_norm": 0.8924158189051365, "learning_rate": 8.039159338671437e-06, "loss": 0.5213, "step": 1409 }, { "epoch": 0.3137516688918558, "grad_norm": 0.9998142983775506, "learning_rate": 8.036297077780775e-06, "loss": 0.523, "step": 1410 }, { "epoch": 0.31397418780596353, "grad_norm": 0.9798697538807276, "learning_rate": 8.033433239750101e-06, "loss": 0.5225, "step": 1411 }, { "epoch": 0.3141967067200712, "grad_norm": 0.9521323755652437, "learning_rate": 8.030567826066975e-06, "loss": 0.5057, "step": 1412 }, { "epoch": 0.3144192256341789, "grad_norm": 0.9904949942269291, "learning_rate": 8.027700838219774e-06, "loss": 0.5304, "step": 1413 }, { "epoch": 0.3146417445482866, "grad_norm": 0.9295038817773738, "learning_rate": 8.024832277697692e-06, "loss": 0.5136, "step": 1414 }, { "epoch": 0.3148642634623943, "grad_norm": 0.9806322730542638, "learning_rate": 8.021962145990746e-06, "loss": 0.5156, "step": 1415 }, { "epoch": 0.315086782376502, "grad_norm": 0.9328580359690276, "learning_rate": 8.019090444589762e-06, "loss": 0.5211, "step": 1416 }, { "epoch": 0.3153093012906097, "grad_norm": 1.054877576165002, "learning_rate": 8.016217174986388e-06, "loss": 0.5384, "step": 1417 }, { "epoch": 0.31553182020471743, "grad_norm": 0.8872279931751127, "learning_rate": 8.013342338673078e-06, "loss": 0.5197, "step": 1418 }, { "epoch": 0.3157543391188251, "grad_norm": 0.9622839832300981, "learning_rate": 8.010465937143106e-06, "loss": 0.5287, "step": 1419 }, { "epoch": 0.3159768580329328, "grad_norm": 0.9491844454075008, "learning_rate": 8.00758797189056e-06, "loss": 0.5173, "step": 1420 }, { "epoch": 0.3161993769470405, "grad_norm": 0.9654592905275879, "learning_rate": 8.004708444410334e-06, "loss": 0.5166, "step": 1421 }, { "epoch": 0.3164218958611482, "grad_norm": 0.9441653129155797, "learning_rate": 8.001827356198141e-06, "loss": 0.543, "step": 1422 }, { "epoch": 0.3166444147752559, "grad_norm": 0.9133474995640725, "learning_rate": 7.9989447087505e-06, "loss": 0.5144, "step": 1423 }, { "epoch": 0.3168669336893636, "grad_norm": 0.9076232456038157, "learning_rate": 7.996060503564741e-06, "loss": 0.5256, "step": 1424 }, { "epoch": 0.3170894526034713, "grad_norm": 0.8921858382360232, "learning_rate": 7.993174742139003e-06, "loss": 0.5208, "step": 1425 }, { "epoch": 0.317311971517579, "grad_norm": 0.8995859447172939, "learning_rate": 7.990287425972232e-06, "loss": 0.5196, "step": 1426 }, { "epoch": 0.3175344904316867, "grad_norm": 0.8722755987093703, "learning_rate": 7.987398556564186e-06, "loss": 0.5203, "step": 1427 }, { "epoch": 0.3177570093457944, "grad_norm": 0.9453290181166469, "learning_rate": 7.984508135415423e-06, "loss": 0.5211, "step": 1428 }, { "epoch": 0.3179795282599021, "grad_norm": 0.8619728538578476, "learning_rate": 7.981616164027316e-06, "loss": 0.5113, "step": 1429 }, { "epoch": 0.3182020471740098, "grad_norm": 0.9595866666502092, "learning_rate": 7.978722643902031e-06, "loss": 0.5202, "step": 1430 }, { "epoch": 0.3184245660881175, "grad_norm": 0.9390174588676369, "learning_rate": 7.975827576542552e-06, "loss": 0.5304, "step": 1431 }, { "epoch": 0.3186470850022252, "grad_norm": 0.977373725887712, "learning_rate": 7.972930963452659e-06, "loss": 0.5491, "step": 1432 }, { "epoch": 0.3188696039163329, "grad_norm": 0.9282581263406573, "learning_rate": 7.970032806136932e-06, "loss": 0.5327, "step": 1433 }, { "epoch": 0.3190921228304406, "grad_norm": 0.9180846078180884, "learning_rate": 7.967133106100762e-06, "loss": 0.5101, "step": 1434 }, { "epoch": 0.31931464174454827, "grad_norm": 0.9834762865930471, "learning_rate": 7.964231864850337e-06, "loss": 0.5246, "step": 1435 }, { "epoch": 0.319537160658656, "grad_norm": 0.8982646091253704, "learning_rate": 7.961329083892639e-06, "loss": 0.533, "step": 1436 }, { "epoch": 0.3197596795727637, "grad_norm": 0.867900916080965, "learning_rate": 7.958424764735462e-06, "loss": 0.5274, "step": 1437 }, { "epoch": 0.31998219848687137, "grad_norm": 0.890398172652515, "learning_rate": 7.955518908887392e-06, "loss": 0.5217, "step": 1438 }, { "epoch": 0.3202047174009791, "grad_norm": 0.8873164615507222, "learning_rate": 7.952611517857811e-06, "loss": 0.5101, "step": 1439 }, { "epoch": 0.3204272363150868, "grad_norm": 0.8866114766216252, "learning_rate": 7.949702593156905e-06, "loss": 0.5308, "step": 1440 }, { "epoch": 0.3206497552291945, "grad_norm": 0.8852475269310545, "learning_rate": 7.94679213629565e-06, "loss": 0.5088, "step": 1441 }, { "epoch": 0.32087227414330216, "grad_norm": 0.9006641085410773, "learning_rate": 7.943880148785824e-06, "loss": 0.5097, "step": 1442 }, { "epoch": 0.3210947930574099, "grad_norm": 0.9323578710826199, "learning_rate": 7.940966632139993e-06, "loss": 0.5203, "step": 1443 }, { "epoch": 0.3213173119715176, "grad_norm": 0.9016065844463955, "learning_rate": 7.938051587871527e-06, "loss": 0.533, "step": 1444 }, { "epoch": 0.32153983088562527, "grad_norm": 0.9136604910581234, "learning_rate": 7.935135017494578e-06, "loss": 0.5097, "step": 1445 }, { "epoch": 0.32176234979973295, "grad_norm": 0.9839149488626672, "learning_rate": 7.9322169225241e-06, "loss": 0.5094, "step": 1446 }, { "epoch": 0.3219848687138407, "grad_norm": 0.9363031314036719, "learning_rate": 7.929297304475836e-06, "loss": 0.5074, "step": 1447 }, { "epoch": 0.32220738762794837, "grad_norm": 0.9275270120076312, "learning_rate": 7.926376164866317e-06, "loss": 0.5134, "step": 1448 }, { "epoch": 0.32242990654205606, "grad_norm": 1.025305916258728, "learning_rate": 7.923453505212869e-06, "loss": 0.5273, "step": 1449 }, { "epoch": 0.3226524254561638, "grad_norm": 0.996869488584279, "learning_rate": 7.920529327033604e-06, "loss": 0.5105, "step": 1450 }, { "epoch": 0.3228749443702715, "grad_norm": 0.8956811682828125, "learning_rate": 7.917603631847425e-06, "loss": 0.5111, "step": 1451 }, { "epoch": 0.32309746328437916, "grad_norm": 0.8660452526812829, "learning_rate": 7.914676421174023e-06, "loss": 0.5163, "step": 1452 }, { "epoch": 0.32331998219848684, "grad_norm": 0.895231132199203, "learning_rate": 7.911747696533874e-06, "loss": 0.5427, "step": 1453 }, { "epoch": 0.3235425011125946, "grad_norm": 0.930103244851309, "learning_rate": 7.90881745944824e-06, "loss": 0.508, "step": 1454 }, { "epoch": 0.32376502002670227, "grad_norm": 0.9673457225387763, "learning_rate": 7.905885711439175e-06, "loss": 0.515, "step": 1455 }, { "epoch": 0.32398753894080995, "grad_norm": 1.0006672349862904, "learning_rate": 7.902952454029512e-06, "loss": 0.5146, "step": 1456 }, { "epoch": 0.3242100578549177, "grad_norm": 0.9205440269992142, "learning_rate": 7.900017688742864e-06, "loss": 0.5349, "step": 1457 }, { "epoch": 0.32443257676902537, "grad_norm": 0.9016279801426516, "learning_rate": 7.89708141710364e-06, "loss": 0.5009, "step": 1458 }, { "epoch": 0.32465509568313305, "grad_norm": 0.8756731757168372, "learning_rate": 7.894143640637019e-06, "loss": 0.5231, "step": 1459 }, { "epoch": 0.32487761459724074, "grad_norm": 0.8624948975923743, "learning_rate": 7.891204360868969e-06, "loss": 0.5191, "step": 1460 }, { "epoch": 0.3251001335113485, "grad_norm": 0.857699124238838, "learning_rate": 7.888263579326237e-06, "loss": 0.529, "step": 1461 }, { "epoch": 0.32532265242545616, "grad_norm": 0.8862887283188678, "learning_rate": 7.885321297536347e-06, "loss": 0.5167, "step": 1462 }, { "epoch": 0.32554517133956384, "grad_norm": 0.8529789613419072, "learning_rate": 7.882377517027605e-06, "loss": 0.508, "step": 1463 }, { "epoch": 0.3257676902536716, "grad_norm": 0.9599563398511498, "learning_rate": 7.879432239329095e-06, "loss": 0.5175, "step": 1464 }, { "epoch": 0.32599020916777927, "grad_norm": 0.8964366623981532, "learning_rate": 7.87648546597068e-06, "loss": 0.515, "step": 1465 }, { "epoch": 0.32621272808188695, "grad_norm": 0.8683920290858457, "learning_rate": 7.873537198483e-06, "loss": 0.5261, "step": 1466 }, { "epoch": 0.32643524699599463, "grad_norm": 0.95734068867324, "learning_rate": 7.870587438397469e-06, "loss": 0.5292, "step": 1467 }, { "epoch": 0.32665776591010237, "grad_norm": 0.9533135840130484, "learning_rate": 7.867636187246276e-06, "loss": 0.5407, "step": 1468 }, { "epoch": 0.32688028482421005, "grad_norm": 0.881905803544768, "learning_rate": 7.864683446562388e-06, "loss": 0.5113, "step": 1469 }, { "epoch": 0.32710280373831774, "grad_norm": 0.9232370937672819, "learning_rate": 7.861729217879541e-06, "loss": 0.5057, "step": 1470 }, { "epoch": 0.3273253226524255, "grad_norm": 1.013401265790807, "learning_rate": 7.858773502732248e-06, "loss": 0.5305, "step": 1471 }, { "epoch": 0.32754784156653316, "grad_norm": 0.9782067473418843, "learning_rate": 7.855816302655791e-06, "loss": 0.5195, "step": 1472 }, { "epoch": 0.32777036048064084, "grad_norm": 1.0476276145850343, "learning_rate": 7.852857619186226e-06, "loss": 0.5145, "step": 1473 }, { "epoch": 0.3279928793947485, "grad_norm": 0.8790364141454244, "learning_rate": 7.849897453860378e-06, "loss": 0.5272, "step": 1474 }, { "epoch": 0.32821539830885627, "grad_norm": 0.9744242620251231, "learning_rate": 7.846935808215841e-06, "loss": 0.5342, "step": 1475 }, { "epoch": 0.32843791722296395, "grad_norm": 0.8857419239556126, "learning_rate": 7.843972683790982e-06, "loss": 0.5138, "step": 1476 }, { "epoch": 0.32866043613707163, "grad_norm": 0.8845857466846334, "learning_rate": 7.841008082124928e-06, "loss": 0.5301, "step": 1477 }, { "epoch": 0.32888295505117937, "grad_norm": 1.0004332740034343, "learning_rate": 7.838042004757583e-06, "loss": 0.5126, "step": 1478 }, { "epoch": 0.32910547396528705, "grad_norm": 0.8991609632287016, "learning_rate": 7.83507445322961e-06, "loss": 0.5225, "step": 1479 }, { "epoch": 0.32932799287939474, "grad_norm": 0.9672367070043393, "learning_rate": 7.832105429082442e-06, "loss": 0.531, "step": 1480 }, { "epoch": 0.3295505117935024, "grad_norm": 3.6731304446351127, "learning_rate": 7.829134933858275e-06, "loss": 0.5166, "step": 1481 }, { "epoch": 0.32977303070761016, "grad_norm": 0.9501949227321724, "learning_rate": 7.826162969100069e-06, "loss": 0.5195, "step": 1482 }, { "epoch": 0.32999554962171784, "grad_norm": 0.9943038650679786, "learning_rate": 7.82318953635155e-06, "loss": 0.5342, "step": 1483 }, { "epoch": 0.3302180685358255, "grad_norm": 0.8839118961415694, "learning_rate": 7.820214637157202e-06, "loss": 0.5307, "step": 1484 }, { "epoch": 0.33044058744993327, "grad_norm": 1.0103985200613694, "learning_rate": 7.817238273062276e-06, "loss": 0.5527, "step": 1485 }, { "epoch": 0.33066310636404095, "grad_norm": 0.9991559250183386, "learning_rate": 7.81426044561278e-06, "loss": 0.5306, "step": 1486 }, { "epoch": 0.33088562527814863, "grad_norm": 0.8824358575279955, "learning_rate": 7.811281156355481e-06, "loss": 0.5336, "step": 1487 }, { "epoch": 0.3311081441922563, "grad_norm": 0.9512160758183117, "learning_rate": 7.808300406837908e-06, "loss": 0.4993, "step": 1488 }, { "epoch": 0.33133066310636405, "grad_norm": 0.9215051547707004, "learning_rate": 7.80531819860835e-06, "loss": 0.5176, "step": 1489 }, { "epoch": 0.33155318202047174, "grad_norm": 0.8253262453215231, "learning_rate": 7.802334533215854e-06, "loss": 0.5197, "step": 1490 }, { "epoch": 0.3317757009345794, "grad_norm": 0.9237601677000308, "learning_rate": 7.799349412210216e-06, "loss": 0.5308, "step": 1491 }, { "epoch": 0.33199821984868716, "grad_norm": 0.8944480270476948, "learning_rate": 7.796362837141996e-06, "loss": 0.5166, "step": 1492 }, { "epoch": 0.33222073876279484, "grad_norm": 0.9040405211583119, "learning_rate": 7.793374809562508e-06, "loss": 0.5058, "step": 1493 }, { "epoch": 0.3324432576769025, "grad_norm": 0.8910059775172197, "learning_rate": 7.790385331023818e-06, "loss": 0.52, "step": 1494 }, { "epoch": 0.3326657765910102, "grad_norm": 0.9107566809364451, "learning_rate": 7.787394403078747e-06, "loss": 0.5373, "step": 1495 }, { "epoch": 0.33288829550511795, "grad_norm": 0.9674794439634451, "learning_rate": 7.784402027280873e-06, "loss": 0.5456, "step": 1496 }, { "epoch": 0.33311081441922563, "grad_norm": 0.9074384409448379, "learning_rate": 7.781408205184516e-06, "loss": 0.5388, "step": 1497 }, { "epoch": 0.3333333333333333, "grad_norm": 0.9124237359119463, "learning_rate": 7.778412938344755e-06, "loss": 0.5141, "step": 1498 }, { "epoch": 0.33355585224744105, "grad_norm": 0.9074649822340237, "learning_rate": 7.775416228317421e-06, "loss": 0.5072, "step": 1499 }, { "epoch": 0.33377837116154874, "grad_norm": 0.8593985232579898, "learning_rate": 7.77241807665909e-06, "loss": 0.5197, "step": 1500 }, { "epoch": 0.3340008900756564, "grad_norm": 0.8726719472346535, "learning_rate": 7.769418484927085e-06, "loss": 0.5155, "step": 1501 }, { "epoch": 0.3342234089897641, "grad_norm": 0.898101463775657, "learning_rate": 7.766417454679484e-06, "loss": 0.5245, "step": 1502 }, { "epoch": 0.33444592790387184, "grad_norm": 0.8699584037663097, "learning_rate": 7.763414987475107e-06, "loss": 0.5228, "step": 1503 }, { "epoch": 0.3346684468179795, "grad_norm": 0.8675181936724293, "learning_rate": 7.76041108487352e-06, "loss": 0.505, "step": 1504 }, { "epoch": 0.3348909657320872, "grad_norm": 0.9097609218560371, "learning_rate": 7.75740574843504e-06, "loss": 0.517, "step": 1505 }, { "epoch": 0.33511348464619495, "grad_norm": 1.0166292508173622, "learning_rate": 7.754398979720722e-06, "loss": 0.5314, "step": 1506 }, { "epoch": 0.33533600356030263, "grad_norm": 0.9869807898736861, "learning_rate": 7.75139078029237e-06, "loss": 0.5371, "step": 1507 }, { "epoch": 0.3355585224744103, "grad_norm": 0.9446836855059713, "learning_rate": 7.748381151712527e-06, "loss": 0.5285, "step": 1508 }, { "epoch": 0.335781041388518, "grad_norm": 0.9183667439190066, "learning_rate": 7.745370095544485e-06, "loss": 0.5134, "step": 1509 }, { "epoch": 0.33600356030262574, "grad_norm": 0.9185246141205242, "learning_rate": 7.74235761335227e-06, "loss": 0.519, "step": 1510 }, { "epoch": 0.3362260792167334, "grad_norm": 0.9394322671543629, "learning_rate": 7.739343706700652e-06, "loss": 0.5206, "step": 1511 }, { "epoch": 0.3364485981308411, "grad_norm": 0.9721960012359475, "learning_rate": 7.73632837715514e-06, "loss": 0.5215, "step": 1512 }, { "epoch": 0.33667111704494884, "grad_norm": 0.8817048989119176, "learning_rate": 7.733311626281985e-06, "loss": 0.5011, "step": 1513 }, { "epoch": 0.3368936359590565, "grad_norm": 0.8864822331337687, "learning_rate": 7.730293455648174e-06, "loss": 0.4886, "step": 1514 }, { "epoch": 0.3371161548731642, "grad_norm": 0.8887858670238162, "learning_rate": 7.72727386682143e-06, "loss": 0.4937, "step": 1515 }, { "epoch": 0.3373386737872719, "grad_norm": 0.8437320771742681, "learning_rate": 7.724252861370215e-06, "loss": 0.5083, "step": 1516 }, { "epoch": 0.33756119270137963, "grad_norm": 0.9255829845344076, "learning_rate": 7.721230440863727e-06, "loss": 0.5338, "step": 1517 }, { "epoch": 0.3377837116154873, "grad_norm": 0.8974656221572701, "learning_rate": 7.718206606871896e-06, "loss": 0.5192, "step": 1518 }, { "epoch": 0.338006230529595, "grad_norm": 0.9102888448140941, "learning_rate": 7.715181360965391e-06, "loss": 0.5305, "step": 1519 }, { "epoch": 0.33822874944370274, "grad_norm": 0.9472474472275928, "learning_rate": 7.71215470471561e-06, "loss": 0.5199, "step": 1520 }, { "epoch": 0.3384512683578104, "grad_norm": 0.8404781910769548, "learning_rate": 7.709126639694684e-06, "loss": 0.5254, "step": 1521 }, { "epoch": 0.3386737872719181, "grad_norm": 0.871797004057594, "learning_rate": 7.706097167475479e-06, "loss": 0.5023, "step": 1522 }, { "epoch": 0.3388963061860258, "grad_norm": 0.8746926113494895, "learning_rate": 7.703066289631591e-06, "loss": 0.5412, "step": 1523 }, { "epoch": 0.3391188251001335, "grad_norm": 0.9236860342323117, "learning_rate": 7.700034007737345e-06, "loss": 0.5248, "step": 1524 }, { "epoch": 0.3393413440142412, "grad_norm": 0.9888654889349535, "learning_rate": 7.69700032336779e-06, "loss": 0.5248, "step": 1525 }, { "epoch": 0.3395638629283489, "grad_norm": 0.9876494097197134, "learning_rate": 7.693965238098717e-06, "loss": 0.529, "step": 1526 }, { "epoch": 0.33978638184245663, "grad_norm": 0.9487121562429434, "learning_rate": 7.690928753506632e-06, "loss": 0.4946, "step": 1527 }, { "epoch": 0.3400089007565643, "grad_norm": 0.8990789968477947, "learning_rate": 7.68789087116877e-06, "loss": 0.52, "step": 1528 }, { "epoch": 0.340231419670672, "grad_norm": 0.996582149269616, "learning_rate": 7.684851592663101e-06, "loss": 0.5193, "step": 1529 }, { "epoch": 0.3404539385847797, "grad_norm": 0.9329677242906669, "learning_rate": 7.68181091956831e-06, "loss": 0.5246, "step": 1530 }, { "epoch": 0.3406764574988874, "grad_norm": 1.10107385421633, "learning_rate": 7.678768853463807e-06, "loss": 0.5339, "step": 1531 }, { "epoch": 0.3408989764129951, "grad_norm": 0.8681742452852501, "learning_rate": 7.675725395929734e-06, "loss": 0.5163, "step": 1532 }, { "epoch": 0.3411214953271028, "grad_norm": 0.9585267326222981, "learning_rate": 7.67268054854695e-06, "loss": 0.5208, "step": 1533 }, { "epoch": 0.3413440142412105, "grad_norm": 0.9158696056388327, "learning_rate": 7.669634312897032e-06, "loss": 0.5118, "step": 1534 }, { "epoch": 0.3415665331553182, "grad_norm": 1.0214116396757882, "learning_rate": 7.666586690562284e-06, "loss": 0.5037, "step": 1535 }, { "epoch": 0.3417890520694259, "grad_norm": 0.9443551546538748, "learning_rate": 7.663537683125731e-06, "loss": 0.543, "step": 1536 }, { "epoch": 0.3420115709835336, "grad_norm": 0.8738825659344445, "learning_rate": 7.660487292171115e-06, "loss": 0.5045, "step": 1537 }, { "epoch": 0.3422340898976413, "grad_norm": 0.944936859261511, "learning_rate": 7.657435519282892e-06, "loss": 0.5054, "step": 1538 }, { "epoch": 0.342456608811749, "grad_norm": 0.8964160032236518, "learning_rate": 7.654382366046247e-06, "loss": 0.5274, "step": 1539 }, { "epoch": 0.3426791277258567, "grad_norm": 0.9041743905542183, "learning_rate": 7.651327834047071e-06, "loss": 0.5226, "step": 1540 }, { "epoch": 0.3429016466399644, "grad_norm": 0.9341493043769469, "learning_rate": 7.648271924871977e-06, "loss": 0.5163, "step": 1541 }, { "epoch": 0.3431241655540721, "grad_norm": 0.8889468508906698, "learning_rate": 7.645214640108295e-06, "loss": 0.5216, "step": 1542 }, { "epoch": 0.3433466844681798, "grad_norm": 0.929252905760759, "learning_rate": 7.642155981344063e-06, "loss": 0.5077, "step": 1543 }, { "epoch": 0.34356920338228747, "grad_norm": 0.8917976044172635, "learning_rate": 7.639095950168035e-06, "loss": 0.5111, "step": 1544 }, { "epoch": 0.3437917222963952, "grad_norm": 0.8887367036211921, "learning_rate": 7.636034548169683e-06, "loss": 0.515, "step": 1545 }, { "epoch": 0.3440142412105029, "grad_norm": 0.9264336939553903, "learning_rate": 7.632971776939189e-06, "loss": 0.5381, "step": 1546 }, { "epoch": 0.3442367601246106, "grad_norm": 0.9944509537995486, "learning_rate": 7.629907638067438e-06, "loss": 0.5139, "step": 1547 }, { "epoch": 0.3444592790387183, "grad_norm": 0.9155479260631787, "learning_rate": 7.626842133146035e-06, "loss": 0.5191, "step": 1548 }, { "epoch": 0.344681797952826, "grad_norm": 0.9428549028054951, "learning_rate": 7.623775263767294e-06, "loss": 0.5353, "step": 1549 }, { "epoch": 0.3449043168669337, "grad_norm": 0.8770508041717435, "learning_rate": 7.620707031524231e-06, "loss": 0.5153, "step": 1550 }, { "epoch": 0.34512683578104136, "grad_norm": 1.0025421554077336, "learning_rate": 7.6176374380105746e-06, "loss": 0.5299, "step": 1551 }, { "epoch": 0.3453493546951491, "grad_norm": 0.9653573842502841, "learning_rate": 7.614566484820762e-06, "loss": 0.5197, "step": 1552 }, { "epoch": 0.3455718736092568, "grad_norm": 1.030847220045888, "learning_rate": 7.611494173549933e-06, "loss": 0.5322, "step": 1553 }, { "epoch": 0.34579439252336447, "grad_norm": 0.982551139882616, "learning_rate": 7.608420505793937e-06, "loss": 0.5211, "step": 1554 }, { "epoch": 0.3460169114374722, "grad_norm": 1.0158198322269996, "learning_rate": 7.605345483149322e-06, "loss": 0.5275, "step": 1555 }, { "epoch": 0.3462394303515799, "grad_norm": 0.9915979221136074, "learning_rate": 7.602269107213344e-06, "loss": 0.5441, "step": 1556 }, { "epoch": 0.3464619492656876, "grad_norm": 0.9749238960571068, "learning_rate": 7.599191379583963e-06, "loss": 0.5049, "step": 1557 }, { "epoch": 0.34668446817979526, "grad_norm": 0.971637187633321, "learning_rate": 7.596112301859838e-06, "loss": 0.5135, "step": 1558 }, { "epoch": 0.346906987093903, "grad_norm": 0.9169704039032889, "learning_rate": 7.593031875640331e-06, "loss": 0.5214, "step": 1559 }, { "epoch": 0.3471295060080107, "grad_norm": 1.0142005315258404, "learning_rate": 7.589950102525503e-06, "loss": 0.5306, "step": 1560 }, { "epoch": 0.34735202492211836, "grad_norm": 0.9215025283480036, "learning_rate": 7.5868669841161145e-06, "loss": 0.5109, "step": 1561 }, { "epoch": 0.3475745438362261, "grad_norm": 1.039120968745362, "learning_rate": 7.58378252201363e-06, "loss": 0.5233, "step": 1562 }, { "epoch": 0.3477970627503338, "grad_norm": 0.965242453609822, "learning_rate": 7.580696717820204e-06, "loss": 0.5117, "step": 1563 }, { "epoch": 0.34801958166444147, "grad_norm": 0.9273504384210476, "learning_rate": 7.577609573138693e-06, "loss": 0.5095, "step": 1564 }, { "epoch": 0.34824210057854915, "grad_norm": 1.0137887572272433, "learning_rate": 7.574521089572648e-06, "loss": 0.529, "step": 1565 }, { "epoch": 0.3484646194926569, "grad_norm": 0.9961106998283356, "learning_rate": 7.571431268726319e-06, "loss": 0.5248, "step": 1566 }, { "epoch": 0.3486871384067646, "grad_norm": 0.9583332444845641, "learning_rate": 7.568340112204646e-06, "loss": 0.5285, "step": 1567 }, { "epoch": 0.34890965732087226, "grad_norm": 0.9186578207295524, "learning_rate": 7.565247621613263e-06, "loss": 0.504, "step": 1568 }, { "epoch": 0.34913217623498, "grad_norm": 0.9475544987395236, "learning_rate": 7.5621537985585e-06, "loss": 0.5224, "step": 1569 }, { "epoch": 0.3493546951490877, "grad_norm": 0.9730606166794001, "learning_rate": 7.559058644647379e-06, "loss": 0.5108, "step": 1570 }, { "epoch": 0.34957721406319536, "grad_norm": 0.9500011891243121, "learning_rate": 7.55596216148761e-06, "loss": 0.5036, "step": 1571 }, { "epoch": 0.34979973297730305, "grad_norm": 0.8912215656256381, "learning_rate": 7.552864350687596e-06, "loss": 0.5142, "step": 1572 }, { "epoch": 0.3500222518914108, "grad_norm": 0.9070387869873245, "learning_rate": 7.549765213856428e-06, "loss": 0.5177, "step": 1573 }, { "epoch": 0.35024477080551847, "grad_norm": 0.940673444794229, "learning_rate": 7.54666475260389e-06, "loss": 0.5169, "step": 1574 }, { "epoch": 0.35046728971962615, "grad_norm": 0.9452740195963746, "learning_rate": 7.543562968540446e-06, "loss": 0.5133, "step": 1575 }, { "epoch": 0.3506898086337339, "grad_norm": 0.9306440214681468, "learning_rate": 7.540459863277257e-06, "loss": 0.5394, "step": 1576 }, { "epoch": 0.3509123275478416, "grad_norm": 0.9054265400762684, "learning_rate": 7.5373554384261604e-06, "loss": 0.4976, "step": 1577 }, { "epoch": 0.35113484646194926, "grad_norm": 0.8734545061204714, "learning_rate": 7.534249695599686e-06, "loss": 0.5219, "step": 1578 }, { "epoch": 0.35135736537605694, "grad_norm": 0.872074965520361, "learning_rate": 7.531142636411046e-06, "loss": 0.5285, "step": 1579 }, { "epoch": 0.3515798842901647, "grad_norm": 0.957246910079552, "learning_rate": 7.5280342624741374e-06, "loss": 0.5291, "step": 1580 }, { "epoch": 0.35180240320427236, "grad_norm": 0.9422301785963997, "learning_rate": 7.524924575403536e-06, "loss": 0.5299, "step": 1581 }, { "epoch": 0.35202492211838005, "grad_norm": 0.8362492253749035, "learning_rate": 7.521813576814504e-06, "loss": 0.495, "step": 1582 }, { "epoch": 0.3522474410324878, "grad_norm": 0.8989711058848997, "learning_rate": 7.518701268322985e-06, "loss": 0.5124, "step": 1583 }, { "epoch": 0.35246995994659547, "grad_norm": 0.8656750082777009, "learning_rate": 7.515587651545599e-06, "loss": 0.5107, "step": 1584 }, { "epoch": 0.35269247886070315, "grad_norm": 0.9566680667479797, "learning_rate": 7.5124727280996516e-06, "loss": 0.5034, "step": 1585 }, { "epoch": 0.35291499777481083, "grad_norm": 0.8857745000046786, "learning_rate": 7.50935649960312e-06, "loss": 0.5083, "step": 1586 }, { "epoch": 0.3531375166889186, "grad_norm": 1.060975747072023, "learning_rate": 7.506238967674664e-06, "loss": 0.5176, "step": 1587 }, { "epoch": 0.35336003560302626, "grad_norm": 0.9895728576254834, "learning_rate": 7.503120133933621e-06, "loss": 0.5221, "step": 1588 }, { "epoch": 0.35358255451713394, "grad_norm": 0.9540947584434901, "learning_rate": 7.500000000000001e-06, "loss": 0.5248, "step": 1589 }, { "epoch": 0.3538050734312417, "grad_norm": 0.9548393192684516, "learning_rate": 7.496878567494492e-06, "loss": 0.5015, "step": 1590 }, { "epoch": 0.35402759234534936, "grad_norm": 0.9531998764473016, "learning_rate": 7.4937558380384555e-06, "loss": 0.5186, "step": 1591 }, { "epoch": 0.35425011125945705, "grad_norm": 0.9973298124439858, "learning_rate": 7.490631813253927e-06, "loss": 0.5116, "step": 1592 }, { "epoch": 0.35447263017356473, "grad_norm": 0.9041394266055213, "learning_rate": 7.4875064947636145e-06, "loss": 0.5183, "step": 1593 }, { "epoch": 0.35469514908767247, "grad_norm": 1.0320194107271026, "learning_rate": 7.4843798841909e-06, "loss": 0.5199, "step": 1594 }, { "epoch": 0.35491766800178015, "grad_norm": 0.913037006898029, "learning_rate": 7.481251983159834e-06, "loss": 0.4951, "step": 1595 }, { "epoch": 0.35514018691588783, "grad_norm": 1.0299087382529528, "learning_rate": 7.478122793295136e-06, "loss": 0.5016, "step": 1596 }, { "epoch": 0.3553627058299956, "grad_norm": 1.0476393734098857, "learning_rate": 7.4749923162222005e-06, "loss": 0.4998, "step": 1597 }, { "epoch": 0.35558522474410326, "grad_norm": 0.9443102307701943, "learning_rate": 7.471860553567086e-06, "loss": 0.5028, "step": 1598 }, { "epoch": 0.35580774365821094, "grad_norm": 0.9826035236516196, "learning_rate": 7.468727506956519e-06, "loss": 0.5122, "step": 1599 }, { "epoch": 0.3560302625723186, "grad_norm": 0.8962561362726711, "learning_rate": 7.465593178017897e-06, "loss": 0.5165, "step": 1600 }, { "epoch": 0.35625278148642636, "grad_norm": 1.0769873062123407, "learning_rate": 7.462457568379278e-06, "loss": 0.5217, "step": 1601 }, { "epoch": 0.35647530040053405, "grad_norm": 1.090154788767294, "learning_rate": 7.459320679669387e-06, "loss": 0.5173, "step": 1602 }, { "epoch": 0.35669781931464173, "grad_norm": 0.9866299265535768, "learning_rate": 7.456182513517616e-06, "loss": 0.5097, "step": 1603 }, { "epoch": 0.35692033822874947, "grad_norm": 1.019623884789022, "learning_rate": 7.45304307155402e-06, "loss": 0.5157, "step": 1604 }, { "epoch": 0.35714285714285715, "grad_norm": 1.0838812792321426, "learning_rate": 7.449902355409312e-06, "loss": 0.5213, "step": 1605 }, { "epoch": 0.35736537605696483, "grad_norm": 0.99222166556799, "learning_rate": 7.446760366714874e-06, "loss": 0.5232, "step": 1606 }, { "epoch": 0.3575878949710725, "grad_norm": 0.9870490104289543, "learning_rate": 7.4436171071027405e-06, "loss": 0.5313, "step": 1607 }, { "epoch": 0.35781041388518026, "grad_norm": 1.0121107780788485, "learning_rate": 7.440472578205618e-06, "loss": 0.5129, "step": 1608 }, { "epoch": 0.35803293279928794, "grad_norm": 0.9231932517215824, "learning_rate": 7.4373267816568575e-06, "loss": 0.5091, "step": 1609 }, { "epoch": 0.3582554517133956, "grad_norm": 0.9316581074041435, "learning_rate": 7.434179719090481e-06, "loss": 0.5243, "step": 1610 }, { "epoch": 0.35847797062750336, "grad_norm": 1.0463120081868467, "learning_rate": 7.431031392141162e-06, "loss": 0.5064, "step": 1611 }, { "epoch": 0.35870048954161105, "grad_norm": 0.8769144882646546, "learning_rate": 7.427881802444233e-06, "loss": 0.5227, "step": 1612 }, { "epoch": 0.35892300845571873, "grad_norm": 0.9206610970979135, "learning_rate": 7.424730951635677e-06, "loss": 0.5167, "step": 1613 }, { "epoch": 0.3591455273698264, "grad_norm": 0.936639015627384, "learning_rate": 7.421578841352144e-06, "loss": 0.5082, "step": 1614 }, { "epoch": 0.35936804628393415, "grad_norm": 1.0080837696410967, "learning_rate": 7.418425473230924e-06, "loss": 0.5118, "step": 1615 }, { "epoch": 0.35959056519804183, "grad_norm": 0.9249527921790011, "learning_rate": 7.415270848909973e-06, "loss": 0.5047, "step": 1616 }, { "epoch": 0.3598130841121495, "grad_norm": 0.9482014590714005, "learning_rate": 7.412114970027886e-06, "loss": 0.5173, "step": 1617 }, { "epoch": 0.36003560302625726, "grad_norm": 0.974471203204913, "learning_rate": 7.4089578382239245e-06, "loss": 0.5134, "step": 1618 }, { "epoch": 0.36025812194036494, "grad_norm": 0.9961343981931978, "learning_rate": 7.405799455137991e-06, "loss": 0.4885, "step": 1619 }, { "epoch": 0.3604806408544726, "grad_norm": 0.9853169022797397, "learning_rate": 7.40263982241064e-06, "loss": 0.5095, "step": 1620 }, { "epoch": 0.3607031597685803, "grad_norm": 1.0339701777115253, "learning_rate": 7.399478941683075e-06, "loss": 0.5233, "step": 1621 }, { "epoch": 0.36092567868268804, "grad_norm": 1.0319802797870479, "learning_rate": 7.39631681459715e-06, "loss": 0.5294, "step": 1622 }, { "epoch": 0.36114819759679573, "grad_norm": 0.8767662947345212, "learning_rate": 7.393153442795362e-06, "loss": 0.5055, "step": 1623 }, { "epoch": 0.3613707165109034, "grad_norm": 0.9372472247439745, "learning_rate": 7.38998882792086e-06, "loss": 0.5087, "step": 1624 }, { "epoch": 0.36159323542501115, "grad_norm": 0.9891941532456152, "learning_rate": 7.386822971617437e-06, "loss": 0.5144, "step": 1625 }, { "epoch": 0.36181575433911883, "grad_norm": 0.9100536733494057, "learning_rate": 7.383655875529526e-06, "loss": 0.5051, "step": 1626 }, { "epoch": 0.3620382732532265, "grad_norm": 0.9576744814523507, "learning_rate": 7.380487541302211e-06, "loss": 0.5315, "step": 1627 }, { "epoch": 0.3622607921673342, "grad_norm": 0.9223553080148866, "learning_rate": 7.377317970581212e-06, "loss": 0.5078, "step": 1628 }, { "epoch": 0.36248331108144194, "grad_norm": 0.9323401956326454, "learning_rate": 7.3741471650129005e-06, "loss": 0.5075, "step": 1629 }, { "epoch": 0.3627058299955496, "grad_norm": 1.029436233832955, "learning_rate": 7.37097512624428e-06, "loss": 0.5243, "step": 1630 }, { "epoch": 0.3629283489096573, "grad_norm": 0.88070510090318, "learning_rate": 7.367801855923001e-06, "loss": 0.5241, "step": 1631 }, { "epoch": 0.36315086782376504, "grad_norm": 0.8787857457255553, "learning_rate": 7.36462735569735e-06, "loss": 0.5102, "step": 1632 }, { "epoch": 0.36337338673787273, "grad_norm": 0.9543418929220086, "learning_rate": 7.361451627216254e-06, "loss": 0.5141, "step": 1633 }, { "epoch": 0.3635959056519804, "grad_norm": 1.091090977794647, "learning_rate": 7.3582746721292775e-06, "loss": 0.5237, "step": 1634 }, { "epoch": 0.3638184245660881, "grad_norm": 0.8704248489225266, "learning_rate": 7.355096492086623e-06, "loss": 0.5106, "step": 1635 }, { "epoch": 0.36404094348019583, "grad_norm": 0.9116854011730063, "learning_rate": 7.351917088739128e-06, "loss": 0.5125, "step": 1636 }, { "epoch": 0.3642634623943035, "grad_norm": 0.9460133916467309, "learning_rate": 7.348736463738267e-06, "loss": 0.5147, "step": 1637 }, { "epoch": 0.3644859813084112, "grad_norm": 0.9477362361907203, "learning_rate": 7.345554618736146e-06, "loss": 0.5125, "step": 1638 }, { "epoch": 0.36470850022251894, "grad_norm": 0.9553191852685702, "learning_rate": 7.342371555385508e-06, "loss": 0.5154, "step": 1639 }, { "epoch": 0.3649310191366266, "grad_norm": 0.9038123387686351, "learning_rate": 7.3391872753397285e-06, "loss": 0.5174, "step": 1640 }, { "epoch": 0.3651535380507343, "grad_norm": 0.9437791648196479, "learning_rate": 7.336001780252814e-06, "loss": 0.5222, "step": 1641 }, { "epoch": 0.365376056964842, "grad_norm": 0.9318435522219531, "learning_rate": 7.3328150717794e-06, "loss": 0.5104, "step": 1642 }, { "epoch": 0.3655985758789497, "grad_norm": 0.9668771288065621, "learning_rate": 7.3296271515747585e-06, "loss": 0.5364, "step": 1643 }, { "epoch": 0.3658210947930574, "grad_norm": 1.0690231722941907, "learning_rate": 7.3264380212947815e-06, "loss": 0.5158, "step": 1644 }, { "epoch": 0.3660436137071651, "grad_norm": 0.8969290265571781, "learning_rate": 7.323247682596001e-06, "loss": 0.52, "step": 1645 }, { "epoch": 0.36626613262127283, "grad_norm": 1.0095261128559565, "learning_rate": 7.320056137135565e-06, "loss": 0.5048, "step": 1646 }, { "epoch": 0.3664886515353805, "grad_norm": 0.9082916854708433, "learning_rate": 7.316863386571259e-06, "loss": 0.5276, "step": 1647 }, { "epoch": 0.3667111704494882, "grad_norm": 0.9471118270142587, "learning_rate": 7.3136694325614855e-06, "loss": 0.5057, "step": 1648 }, { "epoch": 0.3669336893635959, "grad_norm": 1.1409743851427359, "learning_rate": 7.310474276765278e-06, "loss": 0.5114, "step": 1649 }, { "epoch": 0.3671562082777036, "grad_norm": 0.9035735907104848, "learning_rate": 7.307277920842293e-06, "loss": 0.513, "step": 1650 }, { "epoch": 0.3673787271918113, "grad_norm": 0.9252900312952365, "learning_rate": 7.304080366452808e-06, "loss": 0.511, "step": 1651 }, { "epoch": 0.367601246105919, "grad_norm": 0.9005508451828431, "learning_rate": 7.300881615257725e-06, "loss": 0.5293, "step": 1652 }, { "epoch": 0.3678237650200267, "grad_norm": 0.870774591095475, "learning_rate": 7.297681668918568e-06, "loss": 0.5072, "step": 1653 }, { "epoch": 0.3680462839341344, "grad_norm": 0.896403687628883, "learning_rate": 7.294480529097481e-06, "loss": 0.5274, "step": 1654 }, { "epoch": 0.3682688028482421, "grad_norm": 0.9109021079998301, "learning_rate": 7.291278197457228e-06, "loss": 0.5221, "step": 1655 }, { "epoch": 0.3684913217623498, "grad_norm": 0.9204754348768388, "learning_rate": 7.288074675661192e-06, "loss": 0.5067, "step": 1656 }, { "epoch": 0.3687138406764575, "grad_norm": 0.904440229595502, "learning_rate": 7.284869965373374e-06, "loss": 0.4912, "step": 1657 }, { "epoch": 0.3689363595905652, "grad_norm": 0.9133599405934246, "learning_rate": 7.281664068258394e-06, "loss": 0.5211, "step": 1658 }, { "epoch": 0.3691588785046729, "grad_norm": 0.9453894558215878, "learning_rate": 7.278456985981485e-06, "loss": 0.5155, "step": 1659 }, { "epoch": 0.3693813974187806, "grad_norm": 0.9386627288955148, "learning_rate": 7.2752487202085e-06, "loss": 0.5254, "step": 1660 }, { "epoch": 0.3696039163328883, "grad_norm": 0.9308532309464431, "learning_rate": 7.272039272605902e-06, "loss": 0.5083, "step": 1661 }, { "epoch": 0.369826435246996, "grad_norm": 1.0280854310317649, "learning_rate": 7.268828644840774e-06, "loss": 0.5148, "step": 1662 }, { "epoch": 0.37004895416110367, "grad_norm": 0.945797430232987, "learning_rate": 7.265616838580806e-06, "loss": 0.5074, "step": 1663 }, { "epoch": 0.3702714730752114, "grad_norm": 0.980323491976007, "learning_rate": 7.262403855494301e-06, "loss": 0.51, "step": 1664 }, { "epoch": 0.3704939919893191, "grad_norm": 0.9462489457290725, "learning_rate": 7.259189697250177e-06, "loss": 0.5192, "step": 1665 }, { "epoch": 0.3707165109034268, "grad_norm": 0.975994957319842, "learning_rate": 7.255974365517961e-06, "loss": 0.51, "step": 1666 }, { "epoch": 0.3709390298175345, "grad_norm": 0.9949209632859821, "learning_rate": 7.2527578619677866e-06, "loss": 0.5217, "step": 1667 }, { "epoch": 0.3711615487316422, "grad_norm": 0.9377071465464574, "learning_rate": 7.2495401882703995e-06, "loss": 0.5166, "step": 1668 }, { "epoch": 0.3713840676457499, "grad_norm": 0.9215340914775618, "learning_rate": 7.246321346097152e-06, "loss": 0.5141, "step": 1669 }, { "epoch": 0.37160658655985757, "grad_norm": 0.9265144243925332, "learning_rate": 7.243101337120002e-06, "loss": 0.5083, "step": 1670 }, { "epoch": 0.3718291054739653, "grad_norm": 0.9597297463670964, "learning_rate": 7.239880163011517e-06, "loss": 0.5027, "step": 1671 }, { "epoch": 0.372051624388073, "grad_norm": 1.0410404644319544, "learning_rate": 7.236657825444866e-06, "loss": 0.5176, "step": 1672 }, { "epoch": 0.37227414330218067, "grad_norm": 0.9980036039296207, "learning_rate": 7.233434326093822e-06, "loss": 0.5141, "step": 1673 }, { "epoch": 0.3724966622162884, "grad_norm": 1.1913162884400952, "learning_rate": 7.230209666632768e-06, "loss": 0.4973, "step": 1674 }, { "epoch": 0.3727191811303961, "grad_norm": 0.9957027471620402, "learning_rate": 7.226983848736679e-06, "loss": 0.5278, "step": 1675 }, { "epoch": 0.3729417000445038, "grad_norm": 0.939444903369153, "learning_rate": 7.223756874081143e-06, "loss": 0.5045, "step": 1676 }, { "epoch": 0.37316421895861146, "grad_norm": 0.9976100278827672, "learning_rate": 7.220528744342341e-06, "loss": 0.5326, "step": 1677 }, { "epoch": 0.3733867378727192, "grad_norm": 1.091528099318341, "learning_rate": 7.217299461197056e-06, "loss": 0.5273, "step": 1678 }, { "epoch": 0.3736092567868269, "grad_norm": 0.960829569294914, "learning_rate": 7.21406902632267e-06, "loss": 0.5319, "step": 1679 }, { "epoch": 0.37383177570093457, "grad_norm": 0.9503663779104387, "learning_rate": 7.210837441397165e-06, "loss": 0.528, "step": 1680 }, { "epoch": 0.3740542946150423, "grad_norm": 0.9559626605066275, "learning_rate": 7.207604708099121e-06, "loss": 0.513, "step": 1681 }, { "epoch": 0.37427681352915, "grad_norm": 0.9147399386638592, "learning_rate": 7.2043708281077075e-06, "loss": 0.5016, "step": 1682 }, { "epoch": 0.37449933244325767, "grad_norm": 0.8760739537958754, "learning_rate": 7.2011358031027e-06, "loss": 0.5215, "step": 1683 }, { "epoch": 0.37472185135736535, "grad_norm": 0.9124472437154337, "learning_rate": 7.197899634764461e-06, "loss": 0.5197, "step": 1684 }, { "epoch": 0.3749443702714731, "grad_norm": 0.937717957728742, "learning_rate": 7.194662324773949e-06, "loss": 0.5083, "step": 1685 }, { "epoch": 0.3751668891855808, "grad_norm": 0.9487995479122466, "learning_rate": 7.1914238748127165e-06, "loss": 0.5205, "step": 1686 }, { "epoch": 0.37538940809968846, "grad_norm": 0.9450442624987366, "learning_rate": 7.1881842865629085e-06, "loss": 0.5225, "step": 1687 }, { "epoch": 0.3756119270137962, "grad_norm": 0.9141101718420763, "learning_rate": 7.184943561707259e-06, "loss": 0.513, "step": 1688 }, { "epoch": 0.3758344459279039, "grad_norm": 0.9537552093800669, "learning_rate": 7.181701701929094e-06, "loss": 0.52, "step": 1689 }, { "epoch": 0.37605696484201157, "grad_norm": 0.9263560156586146, "learning_rate": 7.178458708912328e-06, "loss": 0.5083, "step": 1690 }, { "epoch": 0.37627948375611925, "grad_norm": 0.8855105948090252, "learning_rate": 7.175214584341467e-06, "loss": 0.4853, "step": 1691 }, { "epoch": 0.376502002670227, "grad_norm": 0.993428383110455, "learning_rate": 7.171969329901601e-06, "loss": 0.5199, "step": 1692 }, { "epoch": 0.37672452158433467, "grad_norm": 0.9614762208942464, "learning_rate": 7.168722947278408e-06, "loss": 0.5364, "step": 1693 }, { "epoch": 0.37694704049844235, "grad_norm": 0.9672900928760333, "learning_rate": 7.165475438158154e-06, "loss": 0.5378, "step": 1694 }, { "epoch": 0.3771695594125501, "grad_norm": 0.8963931284680182, "learning_rate": 7.162226804227687e-06, "loss": 0.5057, "step": 1695 }, { "epoch": 0.3773920783266578, "grad_norm": 0.9153516325465885, "learning_rate": 7.158977047174441e-06, "loss": 0.4966, "step": 1696 }, { "epoch": 0.37761459724076546, "grad_norm": 0.9776625325401475, "learning_rate": 7.1557261686864355e-06, "loss": 0.4847, "step": 1697 }, { "epoch": 0.37783711615487314, "grad_norm": 0.9582101279271157, "learning_rate": 7.152474170452268e-06, "loss": 0.5327, "step": 1698 }, { "epoch": 0.3780596350689809, "grad_norm": 0.939738295408548, "learning_rate": 7.1492210541611205e-06, "loss": 0.5196, "step": 1699 }, { "epoch": 0.37828215398308856, "grad_norm": 1.0112318555281943, "learning_rate": 7.145966821502755e-06, "loss": 0.5101, "step": 1700 }, { "epoch": 0.37850467289719625, "grad_norm": 1.0011130170962095, "learning_rate": 7.1427114741675145e-06, "loss": 0.5075, "step": 1701 }, { "epoch": 0.378727191811304, "grad_norm": 0.8784637831727126, "learning_rate": 7.139455013846319e-06, "loss": 0.5296, "step": 1702 }, { "epoch": 0.37894971072541167, "grad_norm": 1.025090237461226, "learning_rate": 7.136197442230668e-06, "loss": 0.5189, "step": 1703 }, { "epoch": 0.37917222963951935, "grad_norm": 1.004911028597702, "learning_rate": 7.132938761012638e-06, "loss": 0.5171, "step": 1704 }, { "epoch": 0.37939474855362704, "grad_norm": 0.9855437092788373, "learning_rate": 7.1296789718848815e-06, "loss": 0.5175, "step": 1705 }, { "epoch": 0.3796172674677348, "grad_norm": 0.953414985352527, "learning_rate": 7.1264180765406256e-06, "loss": 0.5071, "step": 1706 }, { "epoch": 0.37983978638184246, "grad_norm": 0.9055157544815094, "learning_rate": 7.123156076673674e-06, "loss": 0.5054, "step": 1707 }, { "epoch": 0.38006230529595014, "grad_norm": 0.9474463129459504, "learning_rate": 7.119892973978405e-06, "loss": 0.5183, "step": 1708 }, { "epoch": 0.3802848242100579, "grad_norm": 0.8945653347415883, "learning_rate": 7.116628770149767e-06, "loss": 0.5059, "step": 1709 }, { "epoch": 0.38050734312416556, "grad_norm": 0.8918967628196641, "learning_rate": 7.113363466883278e-06, "loss": 0.5138, "step": 1710 }, { "epoch": 0.38072986203827325, "grad_norm": 0.8674621131643296, "learning_rate": 7.110097065875036e-06, "loss": 0.4946, "step": 1711 }, { "epoch": 0.38095238095238093, "grad_norm": 0.9676967630990165, "learning_rate": 7.106829568821699e-06, "loss": 0.5311, "step": 1712 }, { "epoch": 0.38117489986648867, "grad_norm": 0.9431249210216, "learning_rate": 7.103560977420501e-06, "loss": 0.5178, "step": 1713 }, { "epoch": 0.38139741878059635, "grad_norm": 0.9738891655337171, "learning_rate": 7.100291293369244e-06, "loss": 0.5205, "step": 1714 }, { "epoch": 0.38161993769470404, "grad_norm": 1.0395854063232959, "learning_rate": 7.097020518366292e-06, "loss": 0.5191, "step": 1715 }, { "epoch": 0.3818424566088118, "grad_norm": 0.9802547930128194, "learning_rate": 7.093748654110582e-06, "loss": 0.5292, "step": 1716 }, { "epoch": 0.38206497552291946, "grad_norm": 1.0085082725962893, "learning_rate": 7.0904757023016135e-06, "loss": 0.517, "step": 1717 }, { "epoch": 0.38228749443702714, "grad_norm": 0.9469699708836767, "learning_rate": 7.087201664639454e-06, "loss": 0.5007, "step": 1718 }, { "epoch": 0.3825100133511348, "grad_norm": 0.9357074974373795, "learning_rate": 7.083926542824728e-06, "loss": 0.5007, "step": 1719 }, { "epoch": 0.38273253226524256, "grad_norm": 1.0250393292513085, "learning_rate": 7.080650338558634e-06, "loss": 0.5022, "step": 1720 }, { "epoch": 0.38295505117935025, "grad_norm": 0.9305526241305793, "learning_rate": 7.077373053542922e-06, "loss": 0.5017, "step": 1721 }, { "epoch": 0.38317757009345793, "grad_norm": 0.915264216658998, "learning_rate": 7.074094689479911e-06, "loss": 0.51, "step": 1722 }, { "epoch": 0.38340008900756567, "grad_norm": 0.9736331779961643, "learning_rate": 7.070815248072476e-06, "loss": 0.5172, "step": 1723 }, { "epoch": 0.38362260792167335, "grad_norm": 0.9745510577760587, "learning_rate": 7.067534731024054e-06, "loss": 0.5014, "step": 1724 }, { "epoch": 0.38384512683578104, "grad_norm": 0.9255815004257248, "learning_rate": 7.064253140038639e-06, "loss": 0.5082, "step": 1725 }, { "epoch": 0.3840676457498887, "grad_norm": 0.9603373994199943, "learning_rate": 7.060970476820783e-06, "loss": 0.5097, "step": 1726 }, { "epoch": 0.38429016466399646, "grad_norm": 1.0089161095085002, "learning_rate": 7.057686743075598e-06, "loss": 0.5108, "step": 1727 }, { "epoch": 0.38451268357810414, "grad_norm": 1.000990314413955, "learning_rate": 7.054401940508748e-06, "loss": 0.5131, "step": 1728 }, { "epoch": 0.3847352024922118, "grad_norm": 0.9298826131859594, "learning_rate": 7.0511160708264545e-06, "loss": 0.5237, "step": 1729 }, { "epoch": 0.38495772140631956, "grad_norm": 0.9571018414935645, "learning_rate": 7.047829135735493e-06, "loss": 0.5183, "step": 1730 }, { "epoch": 0.38518024032042725, "grad_norm": 0.8775602003362057, "learning_rate": 7.044541136943192e-06, "loss": 0.5077, "step": 1731 }, { "epoch": 0.38540275923453493, "grad_norm": 0.9576024530999799, "learning_rate": 7.041252076157431e-06, "loss": 0.5203, "step": 1732 }, { "epoch": 0.3856252781486426, "grad_norm": 0.9031055223910067, "learning_rate": 7.037961955086645e-06, "loss": 0.5083, "step": 1733 }, { "epoch": 0.38584779706275035, "grad_norm": 0.901790679233153, "learning_rate": 7.034670775439818e-06, "loss": 0.5057, "step": 1734 }, { "epoch": 0.38607031597685804, "grad_norm": 0.8849319617131345, "learning_rate": 7.031378538926481e-06, "loss": 0.5123, "step": 1735 }, { "epoch": 0.3862928348909657, "grad_norm": 0.9327533338085033, "learning_rate": 7.028085247256717e-06, "loss": 0.5058, "step": 1736 }, { "epoch": 0.38651535380507346, "grad_norm": 1.1708577450894178, "learning_rate": 7.024790902141157e-06, "loss": 0.4927, "step": 1737 }, { "epoch": 0.38673787271918114, "grad_norm": 0.9693391842999642, "learning_rate": 7.021495505290976e-06, "loss": 0.5193, "step": 1738 }, { "epoch": 0.3869603916332888, "grad_norm": 0.9506281880324016, "learning_rate": 7.018199058417904e-06, "loss": 0.5157, "step": 1739 }, { "epoch": 0.3871829105473965, "grad_norm": 0.9178307595958549, "learning_rate": 7.014901563234204e-06, "loss": 0.5109, "step": 1740 }, { "epoch": 0.38740542946150425, "grad_norm": 1.1289699035857677, "learning_rate": 7.011603021452693e-06, "loss": 0.4928, "step": 1741 }, { "epoch": 0.38762794837561193, "grad_norm": 0.9684817870227882, "learning_rate": 7.0083034347867274e-06, "loss": 0.497, "step": 1742 }, { "epoch": 0.3878504672897196, "grad_norm": 0.9131449657012728, "learning_rate": 7.005002804950209e-06, "loss": 0.4988, "step": 1743 }, { "epoch": 0.38807298620382735, "grad_norm": 1.0289621978100967, "learning_rate": 7.001701133657577e-06, "loss": 0.5168, "step": 1744 }, { "epoch": 0.38829550511793504, "grad_norm": 0.9190429001170846, "learning_rate": 6.998398422623816e-06, "loss": 0.5058, "step": 1745 }, { "epoch": 0.3885180240320427, "grad_norm": 0.9699716670097563, "learning_rate": 6.995094673564451e-06, "loss": 0.5101, "step": 1746 }, { "epoch": 0.3887405429461504, "grad_norm": 0.9165402287262794, "learning_rate": 6.9917898881955395e-06, "loss": 0.4984, "step": 1747 }, { "epoch": 0.38896306186025814, "grad_norm": 0.9544119139520814, "learning_rate": 6.9884840682336865e-06, "loss": 0.509, "step": 1748 }, { "epoch": 0.3891855807743658, "grad_norm": 0.9932131066985761, "learning_rate": 6.985177215396028e-06, "loss": 0.5159, "step": 1749 }, { "epoch": 0.3894080996884735, "grad_norm": 0.9990474929752858, "learning_rate": 6.981869331400238e-06, "loss": 0.5315, "step": 1750 }, { "epoch": 0.38963061860258125, "grad_norm": 0.9841814905115275, "learning_rate": 6.978560417964529e-06, "loss": 0.5087, "step": 1751 }, { "epoch": 0.38985313751668893, "grad_norm": 0.8992113260731326, "learning_rate": 6.975250476807644e-06, "loss": 0.5052, "step": 1752 }, { "epoch": 0.3900756564307966, "grad_norm": 0.9294116215848829, "learning_rate": 6.97193950964886e-06, "loss": 0.5325, "step": 1753 }, { "epoch": 0.3902981753449043, "grad_norm": 0.9467825933882769, "learning_rate": 6.968627518207992e-06, "loss": 0.5177, "step": 1754 }, { "epoch": 0.39052069425901204, "grad_norm": 1.0041559555579134, "learning_rate": 6.965314504205382e-06, "loss": 0.494, "step": 1755 }, { "epoch": 0.3907432131731197, "grad_norm": 0.9288791480428314, "learning_rate": 6.962000469361904e-06, "loss": 0.5166, "step": 1756 }, { "epoch": 0.3909657320872274, "grad_norm": 0.9735208818598275, "learning_rate": 6.958685415398964e-06, "loss": 0.5059, "step": 1757 }, { "epoch": 0.39118825100133514, "grad_norm": 0.9407797278022317, "learning_rate": 6.955369344038495e-06, "loss": 0.5051, "step": 1758 }, { "epoch": 0.3914107699154428, "grad_norm": 0.9911501199890254, "learning_rate": 6.952052257002961e-06, "loss": 0.5133, "step": 1759 }, { "epoch": 0.3916332888295505, "grad_norm": 0.9512840844215209, "learning_rate": 6.948734156015353e-06, "loss": 0.5249, "step": 1760 }, { "epoch": 0.3918558077436582, "grad_norm": 0.9084350085396328, "learning_rate": 6.945415042799187e-06, "loss": 0.5094, "step": 1761 }, { "epoch": 0.39207832665776593, "grad_norm": 0.9728840211566436, "learning_rate": 6.942094919078506e-06, "loss": 0.5246, "step": 1762 }, { "epoch": 0.3923008455718736, "grad_norm": 0.8948204356447368, "learning_rate": 6.938773786577877e-06, "loss": 0.4921, "step": 1763 }, { "epoch": 0.3925233644859813, "grad_norm": 0.9682454513627182, "learning_rate": 6.935451647022394e-06, "loss": 0.5244, "step": 1764 }, { "epoch": 0.39274588340008904, "grad_norm": 0.9567383796028608, "learning_rate": 6.93212850213767e-06, "loss": 0.5083, "step": 1765 }, { "epoch": 0.3929684023141967, "grad_norm": 1.007578148360916, "learning_rate": 6.9288043536498425e-06, "loss": 0.5006, "step": 1766 }, { "epoch": 0.3931909212283044, "grad_norm": 0.9310882258553042, "learning_rate": 6.92547920328557e-06, "loss": 0.5051, "step": 1767 }, { "epoch": 0.3934134401424121, "grad_norm": 0.9493514236996378, "learning_rate": 6.922153052772029e-06, "loss": 0.5115, "step": 1768 }, { "epoch": 0.3936359590565198, "grad_norm": 0.9914503667439694, "learning_rate": 6.918825903836921e-06, "loss": 0.5115, "step": 1769 }, { "epoch": 0.3938584779706275, "grad_norm": 1.0010531179322955, "learning_rate": 6.915497758208462e-06, "loss": 0.5161, "step": 1770 }, { "epoch": 0.3940809968847352, "grad_norm": 0.9761092734588673, "learning_rate": 6.912168617615387e-06, "loss": 0.5132, "step": 1771 }, { "epoch": 0.3943035157988429, "grad_norm": 1.495823065668476, "learning_rate": 6.908838483786944e-06, "loss": 0.5072, "step": 1772 }, { "epoch": 0.3945260347129506, "grad_norm": 0.9182482661193143, "learning_rate": 6.9055073584529034e-06, "loss": 0.4993, "step": 1773 }, { "epoch": 0.3947485536270583, "grad_norm": 0.9395771650052857, "learning_rate": 6.902175243343546e-06, "loss": 0.5196, "step": 1774 }, { "epoch": 0.394971072541166, "grad_norm": 0.957742199006918, "learning_rate": 6.89884214018967e-06, "loss": 0.4884, "step": 1775 }, { "epoch": 0.3951935914552737, "grad_norm": 0.9542163992251576, "learning_rate": 6.895508050722585e-06, "loss": 0.5255, "step": 1776 }, { "epoch": 0.3954161103693814, "grad_norm": 0.969734446374198, "learning_rate": 6.89217297667411e-06, "loss": 0.5261, "step": 1777 }, { "epoch": 0.3956386292834891, "grad_norm": 0.9538807848309444, "learning_rate": 6.888836919776582e-06, "loss": 0.5099, "step": 1778 }, { "epoch": 0.39586114819759677, "grad_norm": 0.9141303429981741, "learning_rate": 6.885499881762841e-06, "loss": 0.5112, "step": 1779 }, { "epoch": 0.3960836671117045, "grad_norm": 0.9324785156756868, "learning_rate": 6.882161864366243e-06, "loss": 0.5061, "step": 1780 }, { "epoch": 0.3963061860258122, "grad_norm": 0.956112623469775, "learning_rate": 6.878822869320652e-06, "loss": 0.5207, "step": 1781 }, { "epoch": 0.3965287049399199, "grad_norm": 1.0846300512700684, "learning_rate": 6.875482898360435e-06, "loss": 0.5113, "step": 1782 }, { "epoch": 0.3967512238540276, "grad_norm": 0.992049458565361, "learning_rate": 6.872141953220468e-06, "loss": 0.5122, "step": 1783 }, { "epoch": 0.3969737427681353, "grad_norm": 0.9317096954237425, "learning_rate": 6.8688000356361374e-06, "loss": 0.505, "step": 1784 }, { "epoch": 0.397196261682243, "grad_norm": 1.0449776649894322, "learning_rate": 6.865457147343331e-06, "loss": 0.5031, "step": 1785 }, { "epoch": 0.39741878059635066, "grad_norm": 0.9827051563709696, "learning_rate": 6.862113290078438e-06, "loss": 0.5049, "step": 1786 }, { "epoch": 0.3976412995104584, "grad_norm": 1.0293708634730876, "learning_rate": 6.858768465578356e-06, "loss": 0.505, "step": 1787 }, { "epoch": 0.3978638184245661, "grad_norm": 0.9743800891486436, "learning_rate": 6.855422675580484e-06, "loss": 0.487, "step": 1788 }, { "epoch": 0.39808633733867377, "grad_norm": 1.0001329063274458, "learning_rate": 6.85207592182272e-06, "loss": 0.5166, "step": 1789 }, { "epoch": 0.3983088562527815, "grad_norm": 0.9796085637971632, "learning_rate": 6.848728206043463e-06, "loss": 0.4968, "step": 1790 }, { "epoch": 0.3985313751668892, "grad_norm": 0.9095503625449062, "learning_rate": 6.845379529981616e-06, "loss": 0.4869, "step": 1791 }, { "epoch": 0.3987538940809969, "grad_norm": 1.002576025062211, "learning_rate": 6.842029895376576e-06, "loss": 0.4894, "step": 1792 }, { "epoch": 0.39897641299510456, "grad_norm": 1.0689398248333066, "learning_rate": 6.838679303968239e-06, "loss": 0.4946, "step": 1793 }, { "epoch": 0.3991989319092123, "grad_norm": 1.0299136128240232, "learning_rate": 6.835327757497e-06, "loss": 0.5217, "step": 1794 }, { "epoch": 0.39942145082332, "grad_norm": 0.9793845248974179, "learning_rate": 6.831975257703748e-06, "loss": 0.5162, "step": 1795 }, { "epoch": 0.39964396973742766, "grad_norm": 0.9392719425847265, "learning_rate": 6.828621806329867e-06, "loss": 0.4905, "step": 1796 }, { "epoch": 0.3998664886515354, "grad_norm": 1.0170736976533232, "learning_rate": 6.825267405117235e-06, "loss": 0.4975, "step": 1797 }, { "epoch": 0.4000890075656431, "grad_norm": 1.0285047766047022, "learning_rate": 6.821912055808229e-06, "loss": 0.5131, "step": 1798 }, { "epoch": 0.40031152647975077, "grad_norm": 0.9064648101933526, "learning_rate": 6.818555760145709e-06, "loss": 0.5114, "step": 1799 }, { "epoch": 0.40053404539385845, "grad_norm": 0.9512872255282571, "learning_rate": 6.815198519873033e-06, "loss": 0.5111, "step": 1800 }, { "epoch": 0.4007565643079662, "grad_norm": 1.07358518430897, "learning_rate": 6.81184033673405e-06, "loss": 0.5014, "step": 1801 }, { "epoch": 0.4009790832220739, "grad_norm": 0.982735888227728, "learning_rate": 6.808481212473096e-06, "loss": 0.4938, "step": 1802 }, { "epoch": 0.40120160213618156, "grad_norm": 0.9777721138458596, "learning_rate": 6.805121148834994e-06, "loss": 0.5192, "step": 1803 }, { "epoch": 0.4014241210502893, "grad_norm": 0.9712258084401173, "learning_rate": 6.801760147565061e-06, "loss": 0.5011, "step": 1804 }, { "epoch": 0.401646639964397, "grad_norm": 1.0444552231235316, "learning_rate": 6.7983982104090975e-06, "loss": 0.5051, "step": 1805 }, { "epoch": 0.40186915887850466, "grad_norm": 0.980116259348569, "learning_rate": 6.795035339113387e-06, "loss": 0.5031, "step": 1806 }, { "epoch": 0.40209167779261235, "grad_norm": 0.999348795430473, "learning_rate": 6.791671535424707e-06, "loss": 0.5237, "step": 1807 }, { "epoch": 0.4023141967067201, "grad_norm": 1.0968816275014381, "learning_rate": 6.78830680109031e-06, "loss": 0.5188, "step": 1808 }, { "epoch": 0.40253671562082777, "grad_norm": 0.9624355485747612, "learning_rate": 6.784941137857935e-06, "loss": 0.5052, "step": 1809 }, { "epoch": 0.40275923453493545, "grad_norm": 1.0244363142727486, "learning_rate": 6.781574547475807e-06, "loss": 0.5177, "step": 1810 }, { "epoch": 0.4029817534490432, "grad_norm": 1.0563071134731465, "learning_rate": 6.7782070316926285e-06, "loss": 0.5078, "step": 1811 }, { "epoch": 0.4032042723631509, "grad_norm": 0.9542389023270607, "learning_rate": 6.774838592257583e-06, "loss": 0.5002, "step": 1812 }, { "epoch": 0.40342679127725856, "grad_norm": 1.0593431830824154, "learning_rate": 6.771469230920339e-06, "loss": 0.5264, "step": 1813 }, { "epoch": 0.40364931019136624, "grad_norm": 1.602452341457606, "learning_rate": 6.768098949431035e-06, "loss": 0.521, "step": 1814 }, { "epoch": 0.403871829105474, "grad_norm": 1.0822403205100284, "learning_rate": 6.764727749540293e-06, "loss": 0.5101, "step": 1815 }, { "epoch": 0.40409434801958166, "grad_norm": 0.9940160057438393, "learning_rate": 6.761355632999214e-06, "loss": 0.5171, "step": 1816 }, { "epoch": 0.40431686693368935, "grad_norm": 0.9794357001972331, "learning_rate": 6.757982601559369e-06, "loss": 0.4996, "step": 1817 }, { "epoch": 0.4045393858477971, "grad_norm": 0.9383068326951409, "learning_rate": 6.754608656972809e-06, "loss": 0.4944, "step": 1818 }, { "epoch": 0.40476190476190477, "grad_norm": 0.9266271385631147, "learning_rate": 6.751233800992059e-06, "loss": 0.5038, "step": 1819 }, { "epoch": 0.40498442367601245, "grad_norm": 0.9882707146088728, "learning_rate": 6.747858035370112e-06, "loss": 0.5091, "step": 1820 }, { "epoch": 0.40520694259012013, "grad_norm": 1.0162747608299063, "learning_rate": 6.744481361860442e-06, "loss": 0.5045, "step": 1821 }, { "epoch": 0.4054294615042279, "grad_norm": 1.091655857264674, "learning_rate": 6.74110378221699e-06, "loss": 0.523, "step": 1822 }, { "epoch": 0.40565198041833556, "grad_norm": 1.0241699529078094, "learning_rate": 6.737725298194166e-06, "loss": 0.5066, "step": 1823 }, { "epoch": 0.40587449933244324, "grad_norm": 1.5028545631718437, "learning_rate": 6.734345911546853e-06, "loss": 0.5213, "step": 1824 }, { "epoch": 0.406097018246551, "grad_norm": 1.0484253660580634, "learning_rate": 6.730965624030401e-06, "loss": 0.4989, "step": 1825 }, { "epoch": 0.40631953716065866, "grad_norm": 0.9715371049271198, "learning_rate": 6.72758443740063e-06, "loss": 0.4983, "step": 1826 }, { "epoch": 0.40654205607476634, "grad_norm": 1.0140509326775762, "learning_rate": 6.724202353413823e-06, "loss": 0.5257, "step": 1827 }, { "epoch": 0.40676457498887403, "grad_norm": 0.9329353158144232, "learning_rate": 6.720819373826735e-06, "loss": 0.5078, "step": 1828 }, { "epoch": 0.40698709390298177, "grad_norm": 0.979652447902449, "learning_rate": 6.71743550039658e-06, "loss": 0.5049, "step": 1829 }, { "epoch": 0.40720961281708945, "grad_norm": 0.989199362927149, "learning_rate": 6.714050734881038e-06, "loss": 0.5001, "step": 1830 }, { "epoch": 0.40743213173119713, "grad_norm": 0.9358098044410006, "learning_rate": 6.710665079038258e-06, "loss": 0.4944, "step": 1831 }, { "epoch": 0.4076546506453049, "grad_norm": 1.0285305007324443, "learning_rate": 6.707278534626845e-06, "loss": 0.5115, "step": 1832 }, { "epoch": 0.40787716955941256, "grad_norm": 1.0310047890043315, "learning_rate": 6.703891103405866e-06, "loss": 0.496, "step": 1833 }, { "epoch": 0.40809968847352024, "grad_norm": 1.0353608662182474, "learning_rate": 6.7005027871348516e-06, "loss": 0.5201, "step": 1834 }, { "epoch": 0.4083222073876279, "grad_norm": 1.0253661574953572, "learning_rate": 6.69711358757379e-06, "loss": 0.5017, "step": 1835 }, { "epoch": 0.40854472630173566, "grad_norm": 0.9540048079625912, "learning_rate": 6.693723506483132e-06, "loss": 0.5043, "step": 1836 }, { "epoch": 0.40876724521584334, "grad_norm": 0.9286932183881416, "learning_rate": 6.690332545623778e-06, "loss": 0.5027, "step": 1837 }, { "epoch": 0.40898976412995103, "grad_norm": 1.011824895463246, "learning_rate": 6.6869407067570945e-06, "loss": 0.5021, "step": 1838 }, { "epoch": 0.40921228304405877, "grad_norm": 1.0499464676374894, "learning_rate": 6.6835479916448984e-06, "loss": 0.4879, "step": 1839 }, { "epoch": 0.40943480195816645, "grad_norm": 0.9801571536413262, "learning_rate": 6.680154402049464e-06, "loss": 0.5003, "step": 1840 }, { "epoch": 0.40965732087227413, "grad_norm": 1.1933587304673396, "learning_rate": 6.6767599397335185e-06, "loss": 0.5214, "step": 1841 }, { "epoch": 0.4098798397863818, "grad_norm": 1.319548758469923, "learning_rate": 6.673364606460246e-06, "loss": 0.502, "step": 1842 }, { "epoch": 0.41010235870048956, "grad_norm": 0.9284614731251787, "learning_rate": 6.669968403993275e-06, "loss": 0.4978, "step": 1843 }, { "epoch": 0.41032487761459724, "grad_norm": 1.0449761906822084, "learning_rate": 6.666571334096697e-06, "loss": 0.5146, "step": 1844 }, { "epoch": 0.4105473965287049, "grad_norm": 0.9710415309729923, "learning_rate": 6.663173398535043e-06, "loss": 0.5134, "step": 1845 }, { "epoch": 0.41076991544281266, "grad_norm": 2.9063267533755464, "learning_rate": 6.6597745990733006e-06, "loss": 0.5097, "step": 1846 }, { "epoch": 0.41099243435692034, "grad_norm": 1.0217118185531682, "learning_rate": 6.656374937476904e-06, "loss": 0.5326, "step": 1847 }, { "epoch": 0.411214953271028, "grad_norm": 0.9488847843141686, "learning_rate": 6.652974415511735e-06, "loss": 0.5112, "step": 1848 }, { "epoch": 0.4114374721851357, "grad_norm": 0.950745305478498, "learning_rate": 6.649573034944122e-06, "loss": 0.5096, "step": 1849 }, { "epoch": 0.41165999109924345, "grad_norm": 0.9337726286443452, "learning_rate": 6.64617079754084e-06, "loss": 0.5098, "step": 1850 }, { "epoch": 0.41188251001335113, "grad_norm": 0.8993776844156247, "learning_rate": 6.642767705069109e-06, "loss": 0.5083, "step": 1851 }, { "epoch": 0.4121050289274588, "grad_norm": 0.9384578538160697, "learning_rate": 6.639363759296592e-06, "loss": 0.4988, "step": 1852 }, { "epoch": 0.41232754784156656, "grad_norm": 1.0275694632730668, "learning_rate": 6.635958961991399e-06, "loss": 0.4974, "step": 1853 }, { "epoch": 0.41255006675567424, "grad_norm": 0.936314113159998, "learning_rate": 6.632553314922078e-06, "loss": 0.4954, "step": 1854 }, { "epoch": 0.4127725856697819, "grad_norm": 0.983328440295307, "learning_rate": 6.6291468198576195e-06, "loss": 0.5079, "step": 1855 }, { "epoch": 0.4129951045838896, "grad_norm": 1.0486123627009871, "learning_rate": 6.6257394785674555e-06, "loss": 0.5162, "step": 1856 }, { "epoch": 0.41321762349799734, "grad_norm": 0.999840047589728, "learning_rate": 6.622331292821458e-06, "loss": 0.4807, "step": 1857 }, { "epoch": 0.413440142412105, "grad_norm": 0.9217104167230975, "learning_rate": 6.6189222643899356e-06, "loss": 0.5095, "step": 1858 }, { "epoch": 0.4136626613262127, "grad_norm": 0.9464020070259233, "learning_rate": 6.615512395043635e-06, "loss": 0.5156, "step": 1859 }, { "epoch": 0.41388518024032045, "grad_norm": 0.92806827306718, "learning_rate": 6.612101686553742e-06, "loss": 0.4894, "step": 1860 }, { "epoch": 0.41410769915442813, "grad_norm": 1.0132966306726856, "learning_rate": 6.608690140691876e-06, "loss": 0.4925, "step": 1861 }, { "epoch": 0.4143302180685358, "grad_norm": 1.0343798240266187, "learning_rate": 6.605277759230092e-06, "loss": 0.5121, "step": 1862 }, { "epoch": 0.4145527369826435, "grad_norm": 1.403795615712794, "learning_rate": 6.60186454394088e-06, "loss": 0.5054, "step": 1863 }, { "epoch": 0.41477525589675124, "grad_norm": 0.9939901757937781, "learning_rate": 6.5984504965971595e-06, "loss": 0.5052, "step": 1864 }, { "epoch": 0.4149977748108589, "grad_norm": 1.0297534449369268, "learning_rate": 6.595035618972288e-06, "loss": 0.5013, "step": 1865 }, { "epoch": 0.4152202937249666, "grad_norm": 1.0129984449479519, "learning_rate": 6.591619912840048e-06, "loss": 0.5207, "step": 1866 }, { "epoch": 0.41544281263907434, "grad_norm": 0.9566638349763548, "learning_rate": 6.588203379974658e-06, "loss": 0.5048, "step": 1867 }, { "epoch": 0.415665331553182, "grad_norm": 0.9100756154596323, "learning_rate": 6.584786022150765e-06, "loss": 0.5004, "step": 1868 }, { "epoch": 0.4158878504672897, "grad_norm": 0.9283967619217091, "learning_rate": 6.581367841143438e-06, "loss": 0.5001, "step": 1869 }, { "epoch": 0.4161103693813974, "grad_norm": 0.9462541887233074, "learning_rate": 6.577948838728183e-06, "loss": 0.4946, "step": 1870 }, { "epoch": 0.41633288829550513, "grad_norm": 1.0003986428531297, "learning_rate": 6.5745290166809236e-06, "loss": 0.5128, "step": 1871 }, { "epoch": 0.4165554072096128, "grad_norm": 1.0224429995877555, "learning_rate": 6.571108376778017e-06, "loss": 0.5077, "step": 1872 }, { "epoch": 0.4167779261237205, "grad_norm": 0.9163334016538828, "learning_rate": 6.567686920796244e-06, "loss": 0.5057, "step": 1873 }, { "epoch": 0.41700044503782824, "grad_norm": 0.8904449237310106, "learning_rate": 6.564264650512802e-06, "loss": 0.5045, "step": 1874 }, { "epoch": 0.4172229639519359, "grad_norm": 0.9556988257514117, "learning_rate": 6.56084156770532e-06, "loss": 0.5152, "step": 1875 }, { "epoch": 0.4174454828660436, "grad_norm": 1.035518807423699, "learning_rate": 6.557417674151842e-06, "loss": 0.508, "step": 1876 }, { "epoch": 0.4176680017801513, "grad_norm": 0.9475817182201567, "learning_rate": 6.5539929716308425e-06, "loss": 0.5016, "step": 1877 }, { "epoch": 0.417890520694259, "grad_norm": 0.9429972411864409, "learning_rate": 6.550567461921207e-06, "loss": 0.505, "step": 1878 }, { "epoch": 0.4181130396083667, "grad_norm": 0.9423615899477095, "learning_rate": 6.547141146802245e-06, "loss": 0.4792, "step": 1879 }, { "epoch": 0.4183355585224744, "grad_norm": 1.0099019518123296, "learning_rate": 6.54371402805368e-06, "loss": 0.5089, "step": 1880 }, { "epoch": 0.41855807743658213, "grad_norm": 1.102939590063504, "learning_rate": 6.54028610745566e-06, "loss": 0.5148, "step": 1881 }, { "epoch": 0.4187805963506898, "grad_norm": 1.0623606889657498, "learning_rate": 6.536857386788743e-06, "loss": 0.5181, "step": 1882 }, { "epoch": 0.4190031152647975, "grad_norm": 1.058324363211464, "learning_rate": 6.533427867833906e-06, "loss": 0.514, "step": 1883 }, { "epoch": 0.4192256341789052, "grad_norm": 0.9507712458452279, "learning_rate": 6.529997552372541e-06, "loss": 0.508, "step": 1884 }, { "epoch": 0.4194481530930129, "grad_norm": 0.9951353399018629, "learning_rate": 6.526566442186451e-06, "loss": 0.514, "step": 1885 }, { "epoch": 0.4196706720071206, "grad_norm": 0.9931409767535173, "learning_rate": 6.523134539057853e-06, "loss": 0.4926, "step": 1886 }, { "epoch": 0.4198931909212283, "grad_norm": 0.9531521873653338, "learning_rate": 6.519701844769376e-06, "loss": 0.5201, "step": 1887 }, { "epoch": 0.420115709835336, "grad_norm": 0.9651336193812907, "learning_rate": 6.516268361104062e-06, "loss": 0.4815, "step": 1888 }, { "epoch": 0.4203382287494437, "grad_norm": 0.9744026705086778, "learning_rate": 6.512834089845359e-06, "loss": 0.5133, "step": 1889 }, { "epoch": 0.4205607476635514, "grad_norm": 0.9612985313226985, "learning_rate": 6.509399032777129e-06, "loss": 0.4961, "step": 1890 }, { "epoch": 0.4207832665776591, "grad_norm": 0.9557787027979092, "learning_rate": 6.505963191683636e-06, "loss": 0.5002, "step": 1891 }, { "epoch": 0.4210057854917668, "grad_norm": 1.0020076768037498, "learning_rate": 6.502526568349555e-06, "loss": 0.5145, "step": 1892 }, { "epoch": 0.4212283044058745, "grad_norm": 1.0550565381627857, "learning_rate": 6.49908916455997e-06, "loss": 0.5227, "step": 1893 }, { "epoch": 0.4214508233199822, "grad_norm": 0.9165573471103208, "learning_rate": 6.495650982100368e-06, "loss": 0.505, "step": 1894 }, { "epoch": 0.4216733422340899, "grad_norm": 0.9722979405523946, "learning_rate": 6.492212022756634e-06, "loss": 0.5096, "step": 1895 }, { "epoch": 0.4218958611481976, "grad_norm": 1.2454877002315863, "learning_rate": 6.488772288315067e-06, "loss": 0.5049, "step": 1896 }, { "epoch": 0.4221183800623053, "grad_norm": 0.9320707309822202, "learning_rate": 6.485331780562363e-06, "loss": 0.4994, "step": 1897 }, { "epoch": 0.42234089897641297, "grad_norm": 0.9555758863782521, "learning_rate": 6.481890501285619e-06, "loss": 0.4912, "step": 1898 }, { "epoch": 0.4225634178905207, "grad_norm": 1.0136628404153887, "learning_rate": 6.478448452272339e-06, "loss": 0.5074, "step": 1899 }, { "epoch": 0.4227859368046284, "grad_norm": 0.952646044016949, "learning_rate": 6.475005635310417e-06, "loss": 0.4776, "step": 1900 }, { "epoch": 0.4230084557187361, "grad_norm": 1.0084726765653707, "learning_rate": 6.471562052188154e-06, "loss": 0.4941, "step": 1901 }, { "epoch": 0.4232309746328438, "grad_norm": 1.1028448242362463, "learning_rate": 6.468117704694244e-06, "loss": 0.5126, "step": 1902 }, { "epoch": 0.4234534935469515, "grad_norm": 1.0029535327694512, "learning_rate": 6.464672594617784e-06, "loss": 0.4875, "step": 1903 }, { "epoch": 0.4236760124610592, "grad_norm": 0.9950831241826033, "learning_rate": 6.4612267237482584e-06, "loss": 0.5081, "step": 1904 }, { "epoch": 0.42389853137516686, "grad_norm": 1.0188358592728997, "learning_rate": 6.457780093875555e-06, "loss": 0.5137, "step": 1905 }, { "epoch": 0.4241210502892746, "grad_norm": 1.0335767519464714, "learning_rate": 6.454332706789952e-06, "loss": 0.506, "step": 1906 }, { "epoch": 0.4243435692033823, "grad_norm": 1.0336667869608978, "learning_rate": 6.45088456428212e-06, "loss": 0.5217, "step": 1907 }, { "epoch": 0.42456608811748997, "grad_norm": 1.0059273627417133, "learning_rate": 6.447435668143124e-06, "loss": 0.5006, "step": 1908 }, { "epoch": 0.4247886070315977, "grad_norm": 1.1165873815365144, "learning_rate": 6.443986020164421e-06, "loss": 0.5076, "step": 1909 }, { "epoch": 0.4250111259457054, "grad_norm": 0.9369529511641114, "learning_rate": 6.4405356221378566e-06, "loss": 0.5135, "step": 1910 }, { "epoch": 0.4252336448598131, "grad_norm": 1.0019647572973287, "learning_rate": 6.437084475855665e-06, "loss": 0.5114, "step": 1911 }, { "epoch": 0.42545616377392076, "grad_norm": 1.0538869988549036, "learning_rate": 6.433632583110474e-06, "loss": 0.5172, "step": 1912 }, { "epoch": 0.4256786826880285, "grad_norm": 0.9173719331632345, "learning_rate": 6.430179945695293e-06, "loss": 0.4891, "step": 1913 }, { "epoch": 0.4259012016021362, "grad_norm": 0.9976887098491979, "learning_rate": 6.426726565403523e-06, "loss": 0.4862, "step": 1914 }, { "epoch": 0.42612372051624386, "grad_norm": 0.9823515394425055, "learning_rate": 6.423272444028949e-06, "loss": 0.4863, "step": 1915 }, { "epoch": 0.4263462394303516, "grad_norm": 1.0229264901624002, "learning_rate": 6.419817583365739e-06, "loss": 0.4977, "step": 1916 }, { "epoch": 0.4265687583444593, "grad_norm": 1.0181229728135563, "learning_rate": 6.41636198520845e-06, "loss": 0.5119, "step": 1917 }, { "epoch": 0.42679127725856697, "grad_norm": 1.8170771050937666, "learning_rate": 6.412905651352016e-06, "loss": 0.4918, "step": 1918 }, { "epoch": 0.42701379617267465, "grad_norm": 1.0136573386618393, "learning_rate": 6.40944858359176e-06, "loss": 0.5025, "step": 1919 }, { "epoch": 0.4272363150867824, "grad_norm": 0.9885717051094173, "learning_rate": 6.4059907837233774e-06, "loss": 0.4993, "step": 1920 }, { "epoch": 0.4274588340008901, "grad_norm": 1.080571573211567, "learning_rate": 6.402532253542953e-06, "loss": 0.4927, "step": 1921 }, { "epoch": 0.42768135291499776, "grad_norm": 1.0325946228454268, "learning_rate": 6.399072994846947e-06, "loss": 0.5144, "step": 1922 }, { "epoch": 0.4279038718291055, "grad_norm": 0.9661014029963613, "learning_rate": 6.395613009432191e-06, "loss": 0.5063, "step": 1923 }, { "epoch": 0.4281263907432132, "grad_norm": 1.1020337639474262, "learning_rate": 6.392152299095911e-06, "loss": 0.5005, "step": 1924 }, { "epoch": 0.42834890965732086, "grad_norm": 0.9567877884480647, "learning_rate": 6.388690865635694e-06, "loss": 0.5095, "step": 1925 }, { "epoch": 0.42857142857142855, "grad_norm": 1.0164626529241971, "learning_rate": 6.385228710849507e-06, "loss": 0.4922, "step": 1926 }, { "epoch": 0.4287939474855363, "grad_norm": 0.9878669372745648, "learning_rate": 6.3817658365356964e-06, "loss": 0.4879, "step": 1927 }, { "epoch": 0.42901646639964397, "grad_norm": 0.9434727867420117, "learning_rate": 6.3783022444929745e-06, "loss": 0.494, "step": 1928 }, { "epoch": 0.42923898531375165, "grad_norm": 0.897276615617273, "learning_rate": 6.374837936520434e-06, "loss": 0.5024, "step": 1929 }, { "epoch": 0.4294615042278594, "grad_norm": 0.9139139436054887, "learning_rate": 6.371372914417535e-06, "loss": 0.4955, "step": 1930 }, { "epoch": 0.4296840231419671, "grad_norm": 0.8703755284172839, "learning_rate": 6.3679071799841095e-06, "loss": 0.4828, "step": 1931 }, { "epoch": 0.42990654205607476, "grad_norm": 1.0177595924121607, "learning_rate": 6.36444073502036e-06, "loss": 0.5203, "step": 1932 }, { "epoch": 0.43012906097018244, "grad_norm": 0.9755941210479535, "learning_rate": 6.360973581326857e-06, "loss": 0.5141, "step": 1933 }, { "epoch": 0.4303515798842902, "grad_norm": 0.9401630095106616, "learning_rate": 6.35750572070454e-06, "loss": 0.4852, "step": 1934 }, { "epoch": 0.43057409879839786, "grad_norm": 0.9945158159002839, "learning_rate": 6.354037154954715e-06, "loss": 0.5049, "step": 1935 }, { "epoch": 0.43079661771250555, "grad_norm": 0.9122213489259284, "learning_rate": 6.350567885879058e-06, "loss": 0.4918, "step": 1936 }, { "epoch": 0.4310191366266133, "grad_norm": 0.9678492121372808, "learning_rate": 6.347097915279603e-06, "loss": 0.4957, "step": 1937 }, { "epoch": 0.43124165554072097, "grad_norm": 1.0155323351222365, "learning_rate": 6.343627244958755e-06, "loss": 0.502, "step": 1938 }, { "epoch": 0.43146417445482865, "grad_norm": 0.9861212230040557, "learning_rate": 6.340155876719276e-06, "loss": 0.4993, "step": 1939 }, { "epoch": 0.43168669336893634, "grad_norm": 1.0256311535430043, "learning_rate": 6.336683812364301e-06, "loss": 0.5051, "step": 1940 }, { "epoch": 0.4319092122830441, "grad_norm": 1.0845160393236752, "learning_rate": 6.333211053697316e-06, "loss": 0.5053, "step": 1941 }, { "epoch": 0.43213173119715176, "grad_norm": 1.0589233624824572, "learning_rate": 6.3297376025221725e-06, "loss": 0.5159, "step": 1942 }, { "epoch": 0.43235425011125944, "grad_norm": 0.9773628213225745, "learning_rate": 6.326263460643081e-06, "loss": 0.5001, "step": 1943 }, { "epoch": 0.4325767690253672, "grad_norm": 1.0430579972256846, "learning_rate": 6.322788629864609e-06, "loss": 0.5277, "step": 1944 }, { "epoch": 0.43279928793947486, "grad_norm": 1.0214257585481905, "learning_rate": 6.3193131119916854e-06, "loss": 0.5195, "step": 1945 }, { "epoch": 0.43302180685358255, "grad_norm": 1.019847627597177, "learning_rate": 6.315836908829594e-06, "loss": 0.5187, "step": 1946 }, { "epoch": 0.43324432576769023, "grad_norm": 1.010708707250912, "learning_rate": 6.3123600221839755e-06, "loss": 0.4997, "step": 1947 }, { "epoch": 0.43346684468179797, "grad_norm": 1.1334245068493312, "learning_rate": 6.308882453860821e-06, "loss": 0.4868, "step": 1948 }, { "epoch": 0.43368936359590565, "grad_norm": 1.0434407871989713, "learning_rate": 6.305404205666484e-06, "loss": 0.5067, "step": 1949 }, { "epoch": 0.43391188251001334, "grad_norm": 0.9702369154914667, "learning_rate": 6.301925279407666e-06, "loss": 0.5052, "step": 1950 }, { "epoch": 0.4341344014241211, "grad_norm": 0.939409157347434, "learning_rate": 6.298445676891418e-06, "loss": 0.4983, "step": 1951 }, { "epoch": 0.43435692033822876, "grad_norm": 1.0277053113495975, "learning_rate": 6.294965399925149e-06, "loss": 0.4939, "step": 1952 }, { "epoch": 0.43457943925233644, "grad_norm": 1.0448983866421626, "learning_rate": 6.291484450316614e-06, "loss": 0.5068, "step": 1953 }, { "epoch": 0.4348019581664441, "grad_norm": 0.9748221358279066, "learning_rate": 6.288002829873916e-06, "loss": 0.4815, "step": 1954 }, { "epoch": 0.43502447708055186, "grad_norm": 1.068743331692257, "learning_rate": 6.284520540405513e-06, "loss": 0.4945, "step": 1955 }, { "epoch": 0.43524699599465955, "grad_norm": 1.0235379523363448, "learning_rate": 6.281037583720204e-06, "loss": 0.514, "step": 1956 }, { "epoch": 0.43546951490876723, "grad_norm": 1.1488847004676999, "learning_rate": 6.2775539616271364e-06, "loss": 0.5186, "step": 1957 }, { "epoch": 0.43569203382287497, "grad_norm": 1.0426698122641207, "learning_rate": 6.2740696759358045e-06, "loss": 0.4982, "step": 1958 }, { "epoch": 0.43591455273698265, "grad_norm": 1.0582315815443974, "learning_rate": 6.270584728456046e-06, "loss": 0.5016, "step": 1959 }, { "epoch": 0.43613707165109034, "grad_norm": 1.3568671395494398, "learning_rate": 6.267099120998043e-06, "loss": 0.4845, "step": 1960 }, { "epoch": 0.436359590565198, "grad_norm": 0.9797398368526014, "learning_rate": 6.263612855372321e-06, "loss": 0.5058, "step": 1961 }, { "epoch": 0.43658210947930576, "grad_norm": 1.0656836328551742, "learning_rate": 6.260125933389745e-06, "loss": 0.5037, "step": 1962 }, { "epoch": 0.43680462839341344, "grad_norm": 0.994147509702688, "learning_rate": 6.256638356861524e-06, "loss": 0.4921, "step": 1963 }, { "epoch": 0.4370271473075211, "grad_norm": 1.1427572144979823, "learning_rate": 6.2531501275992035e-06, "loss": 0.5108, "step": 1964 }, { "epoch": 0.43724966622162886, "grad_norm": 1.1296064662728722, "learning_rate": 6.249661247414674e-06, "loss": 0.5056, "step": 1965 }, { "epoch": 0.43747218513573655, "grad_norm": 0.9932196399754556, "learning_rate": 6.2461717181201556e-06, "loss": 0.5081, "step": 1966 }, { "epoch": 0.43769470404984423, "grad_norm": 0.9267083693408519, "learning_rate": 6.242681541528214e-06, "loss": 0.4964, "step": 1967 }, { "epoch": 0.4379172229639519, "grad_norm": 1.0897078754397924, "learning_rate": 6.239190719451746e-06, "loss": 0.5017, "step": 1968 }, { "epoch": 0.43813974187805965, "grad_norm": 1.0041047899744817, "learning_rate": 6.235699253703983e-06, "loss": 0.4882, "step": 1969 }, { "epoch": 0.43836226079216734, "grad_norm": 0.9806338404816154, "learning_rate": 6.232207146098494e-06, "loss": 0.5269, "step": 1970 }, { "epoch": 0.438584779706275, "grad_norm": 1.053729726748909, "learning_rate": 6.2287143984491825e-06, "loss": 0.5143, "step": 1971 }, { "epoch": 0.43880729862038276, "grad_norm": 1.0738815956947139, "learning_rate": 6.225221012570278e-06, "loss": 0.5044, "step": 1972 }, { "epoch": 0.43902981753449044, "grad_norm": 1.005869040449588, "learning_rate": 6.221726990276349e-06, "loss": 0.5132, "step": 1973 }, { "epoch": 0.4392523364485981, "grad_norm": 1.1296889689183247, "learning_rate": 6.218232333382288e-06, "loss": 0.4935, "step": 1974 }, { "epoch": 0.4394748553627058, "grad_norm": 1.0134644686237082, "learning_rate": 6.2147370437033226e-06, "loss": 0.4872, "step": 1975 }, { "epoch": 0.43969737427681355, "grad_norm": 0.9798485857276173, "learning_rate": 6.211241123055007e-06, "loss": 0.4855, "step": 1976 }, { "epoch": 0.43991989319092123, "grad_norm": 1.02014028378511, "learning_rate": 6.207744573253223e-06, "loss": 0.5066, "step": 1977 }, { "epoch": 0.4401424121050289, "grad_norm": 1.0530472148756103, "learning_rate": 6.204247396114177e-06, "loss": 0.5021, "step": 1978 }, { "epoch": 0.44036493101913665, "grad_norm": 1.017666410815338, "learning_rate": 6.200749593454405e-06, "loss": 0.5192, "step": 1979 }, { "epoch": 0.44058744993324434, "grad_norm": 1.3457099163980506, "learning_rate": 6.197251167090764e-06, "loss": 0.4863, "step": 1980 }, { "epoch": 0.440809968847352, "grad_norm": 0.914220443957893, "learning_rate": 6.193752118840441e-06, "loss": 0.4884, "step": 1981 }, { "epoch": 0.4410324877614597, "grad_norm": 0.9599262467039413, "learning_rate": 6.19025245052094e-06, "loss": 0.4951, "step": 1982 }, { "epoch": 0.44125500667556744, "grad_norm": 1.019845270079923, "learning_rate": 6.18675216395009e-06, "loss": 0.4988, "step": 1983 }, { "epoch": 0.4414775255896751, "grad_norm": 1.065145182972958, "learning_rate": 6.18325126094604e-06, "loss": 0.4879, "step": 1984 }, { "epoch": 0.4417000445037828, "grad_norm": 0.99375060402499, "learning_rate": 6.179749743327258e-06, "loss": 0.494, "step": 1985 }, { "epoch": 0.44192256341789055, "grad_norm": 1.0553175611456231, "learning_rate": 6.176247612912537e-06, "loss": 0.4804, "step": 1986 }, { "epoch": 0.44214508233199823, "grad_norm": 0.9929493543318975, "learning_rate": 6.172744871520983e-06, "loss": 0.4904, "step": 1987 }, { "epoch": 0.4423676012461059, "grad_norm": 1.0008652737530934, "learning_rate": 6.169241520972017e-06, "loss": 0.5005, "step": 1988 }, { "epoch": 0.4425901201602136, "grad_norm": 1.048621762649114, "learning_rate": 6.165737563085384e-06, "loss": 0.511, "step": 1989 }, { "epoch": 0.44281263907432133, "grad_norm": 1.070109831453564, "learning_rate": 6.162232999681139e-06, "loss": 0.4916, "step": 1990 }, { "epoch": 0.443035157988429, "grad_norm": 0.9844843562111817, "learning_rate": 6.158727832579653e-06, "loss": 0.498, "step": 1991 }, { "epoch": 0.4432576769025367, "grad_norm": 1.0485182689004708, "learning_rate": 6.155222063601611e-06, "loss": 0.4933, "step": 1992 }, { "epoch": 0.44348019581664444, "grad_norm": 1.0000737315000867, "learning_rate": 6.151715694568012e-06, "loss": 0.4945, "step": 1993 }, { "epoch": 0.4437027147307521, "grad_norm": 1.0401329799037107, "learning_rate": 6.14820872730016e-06, "loss": 0.4997, "step": 1994 }, { "epoch": 0.4439252336448598, "grad_norm": 1.1584987345720557, "learning_rate": 6.144701163619678e-06, "loss": 0.4823, "step": 1995 }, { "epoch": 0.4441477525589675, "grad_norm": 1.0223330426819504, "learning_rate": 6.141193005348497e-06, "loss": 0.4963, "step": 1996 }, { "epoch": 0.44437027147307523, "grad_norm": 1.01189572965033, "learning_rate": 6.1376842543088515e-06, "loss": 0.4936, "step": 1997 }, { "epoch": 0.4445927903871829, "grad_norm": 1.0036445174696467, "learning_rate": 6.1341749123232906e-06, "loss": 0.495, "step": 1998 }, { "epoch": 0.4448153093012906, "grad_norm": 1.058619671774459, "learning_rate": 6.1306649812146665e-06, "loss": 0.4998, "step": 1999 }, { "epoch": 0.44503782821539833, "grad_norm": 0.9914391480076384, "learning_rate": 6.127154462806136e-06, "loss": 0.4953, "step": 2000 }, { "epoch": 0.445260347129506, "grad_norm": 1.1404759742631252, "learning_rate": 6.123643358921168e-06, "loss": 0.5087, "step": 2001 }, { "epoch": 0.4454828660436137, "grad_norm": 0.9846507043539914, "learning_rate": 6.120131671383527e-06, "loss": 0.4999, "step": 2002 }, { "epoch": 0.4457053849577214, "grad_norm": 1.0026031233559676, "learning_rate": 6.116619402017285e-06, "loss": 0.4853, "step": 2003 }, { "epoch": 0.4459279038718291, "grad_norm": 1.0134279496362086, "learning_rate": 6.113106552646818e-06, "loss": 0.487, "step": 2004 }, { "epoch": 0.4461504227859368, "grad_norm": 0.9928419580364871, "learning_rate": 6.109593125096799e-06, "loss": 0.4958, "step": 2005 }, { "epoch": 0.4463729417000445, "grad_norm": 1.049809486594739, "learning_rate": 6.106079121192202e-06, "loss": 0.4826, "step": 2006 }, { "epoch": 0.44659546061415223, "grad_norm": 1.0511021013250523, "learning_rate": 6.1025645427583055e-06, "loss": 0.4893, "step": 2007 }, { "epoch": 0.4468179795282599, "grad_norm": 1.009964281523944, "learning_rate": 6.099049391620682e-06, "loss": 0.4986, "step": 2008 }, { "epoch": 0.4470404984423676, "grad_norm": 0.9834315898580688, "learning_rate": 6.095533669605198e-06, "loss": 0.4973, "step": 2009 }, { "epoch": 0.4472630173564753, "grad_norm": 1.006009931964054, "learning_rate": 6.092017378538025e-06, "loss": 0.4945, "step": 2010 }, { "epoch": 0.447485536270583, "grad_norm": 1.0243861651747517, "learning_rate": 6.088500520245621e-06, "loss": 0.5052, "step": 2011 }, { "epoch": 0.4477080551846907, "grad_norm": 1.0112915300745602, "learning_rate": 6.084983096554749e-06, "loss": 0.4945, "step": 2012 }, { "epoch": 0.4479305740987984, "grad_norm": 0.9814824377923299, "learning_rate": 6.081465109292456e-06, "loss": 0.5079, "step": 2013 }, { "epoch": 0.4481530930129061, "grad_norm": 0.9644589324927368, "learning_rate": 6.077946560286087e-06, "loss": 0.5223, "step": 2014 }, { "epoch": 0.4483756119270138, "grad_norm": 1.0251819715000463, "learning_rate": 6.0744274513632784e-06, "loss": 0.5086, "step": 2015 }, { "epoch": 0.4485981308411215, "grad_norm": 1.056995626224044, "learning_rate": 6.070907784351955e-06, "loss": 0.5003, "step": 2016 }, { "epoch": 0.4488206497552292, "grad_norm": 1.0995794538451893, "learning_rate": 6.067387561080335e-06, "loss": 0.5046, "step": 2017 }, { "epoch": 0.4490431686693369, "grad_norm": 1.078785986749171, "learning_rate": 6.063866783376921e-06, "loss": 0.4966, "step": 2018 }, { "epoch": 0.4492656875834446, "grad_norm": 1.1588969724407254, "learning_rate": 6.0603454530705086e-06, "loss": 0.503, "step": 2019 }, { "epoch": 0.4494882064975523, "grad_norm": 0.9295638184172147, "learning_rate": 6.056823571990177e-06, "loss": 0.4901, "step": 2020 }, { "epoch": 0.44971072541166, "grad_norm": 1.0890678482329228, "learning_rate": 6.0533011419652905e-06, "loss": 0.4959, "step": 2021 }, { "epoch": 0.4499332443257677, "grad_norm": 1.0142337362445795, "learning_rate": 6.049778164825504e-06, "loss": 0.4896, "step": 2022 }, { "epoch": 0.4501557632398754, "grad_norm": 1.0809519776350738, "learning_rate": 6.046254642400752e-06, "loss": 0.5082, "step": 2023 }, { "epoch": 0.45037828215398307, "grad_norm": 1.0931728759730859, "learning_rate": 6.042730576521253e-06, "loss": 0.489, "step": 2024 }, { "epoch": 0.4506008010680908, "grad_norm": 1.0053703702834806, "learning_rate": 6.039205969017508e-06, "loss": 0.4902, "step": 2025 }, { "epoch": 0.4508233199821985, "grad_norm": 1.0211648719233732, "learning_rate": 6.035680821720298e-06, "loss": 0.484, "step": 2026 }, { "epoch": 0.4510458388963062, "grad_norm": 1.1040625334599192, "learning_rate": 6.032155136460689e-06, "loss": 0.5216, "step": 2027 }, { "epoch": 0.4512683578104139, "grad_norm": 0.9995141831620313, "learning_rate": 6.028628915070022e-06, "loss": 0.4817, "step": 2028 }, { "epoch": 0.4514908767245216, "grad_norm": 1.1463498147514062, "learning_rate": 6.025102159379917e-06, "loss": 0.5068, "step": 2029 }, { "epoch": 0.4517133956386293, "grad_norm": 1.015878984346934, "learning_rate": 6.021574871222274e-06, "loss": 0.4997, "step": 2030 }, { "epoch": 0.45193591455273696, "grad_norm": 1.0443187306899284, "learning_rate": 6.018047052429266e-06, "loss": 0.4748, "step": 2031 }, { "epoch": 0.4521584334668447, "grad_norm": 1.1486212260599091, "learning_rate": 6.014518704833344e-06, "loss": 0.5081, "step": 2032 }, { "epoch": 0.4523809523809524, "grad_norm": 1.0340284378546547, "learning_rate": 6.010989830267234e-06, "loss": 0.4954, "step": 2033 }, { "epoch": 0.45260347129506007, "grad_norm": 0.9762869131715155, "learning_rate": 6.007460430563935e-06, "loss": 0.4981, "step": 2034 }, { "epoch": 0.4528259902091678, "grad_norm": 1.012782375909152, "learning_rate": 6.0039305075567175e-06, "loss": 0.4808, "step": 2035 }, { "epoch": 0.4530485091232755, "grad_norm": 1.0236226728536297, "learning_rate": 6.000400063079126e-06, "loss": 0.4963, "step": 2036 }, { "epoch": 0.4532710280373832, "grad_norm": 1.1035776580493724, "learning_rate": 5.9968690989649734e-06, "loss": 0.4953, "step": 2037 }, { "epoch": 0.45349354695149086, "grad_norm": 1.009822878611809, "learning_rate": 5.993337617048347e-06, "loss": 0.507, "step": 2038 }, { "epoch": 0.4537160658655986, "grad_norm": 1.0474372150546483, "learning_rate": 5.989805619163599e-06, "loss": 0.4896, "step": 2039 }, { "epoch": 0.4539385847797063, "grad_norm": 1.0953158941394676, "learning_rate": 5.986273107145348e-06, "loss": 0.4829, "step": 2040 }, { "epoch": 0.45416110369381396, "grad_norm": 1.016605480406379, "learning_rate": 5.982740082828485e-06, "loss": 0.4822, "step": 2041 }, { "epoch": 0.4543836226079217, "grad_norm": 1.0113613058944393, "learning_rate": 5.979206548048163e-06, "loss": 0.5046, "step": 2042 }, { "epoch": 0.4546061415220294, "grad_norm": 1.067860545695911, "learning_rate": 5.9756725046398025e-06, "loss": 0.4959, "step": 2043 }, { "epoch": 0.45482866043613707, "grad_norm": 1.0339476000955619, "learning_rate": 5.972137954439088e-06, "loss": 0.5001, "step": 2044 }, { "epoch": 0.45505117935024475, "grad_norm": 1.1965129194177078, "learning_rate": 5.968602899281964e-06, "loss": 0.5029, "step": 2045 }, { "epoch": 0.4552736982643525, "grad_norm": 1.108317387453635, "learning_rate": 5.9650673410046425e-06, "loss": 0.483, "step": 2046 }, { "epoch": 0.45549621717846017, "grad_norm": 1.0029894772526389, "learning_rate": 5.961531281443592e-06, "loss": 0.5034, "step": 2047 }, { "epoch": 0.45571873609256786, "grad_norm": 1.2176352298313784, "learning_rate": 5.957994722435545e-06, "loss": 0.503, "step": 2048 }, { "epoch": 0.4559412550066756, "grad_norm": 0.9900321575671479, "learning_rate": 5.954457665817491e-06, "loss": 0.4785, "step": 2049 }, { "epoch": 0.4561637739207833, "grad_norm": 1.046442834852293, "learning_rate": 5.950920113426681e-06, "loss": 0.5152, "step": 2050 }, { "epoch": 0.45638629283489096, "grad_norm": 1.0761542051248263, "learning_rate": 5.9473820671006185e-06, "loss": 0.4919, "step": 2051 }, { "epoch": 0.45660881174899864, "grad_norm": 1.0808815816432777, "learning_rate": 5.9438435286770666e-06, "loss": 0.5111, "step": 2052 }, { "epoch": 0.4568313306631064, "grad_norm": 1.056799348132215, "learning_rate": 5.9403044999940465e-06, "loss": 0.4953, "step": 2053 }, { "epoch": 0.45705384957721407, "grad_norm": 1.0752569273105035, "learning_rate": 5.93676498288983e-06, "loss": 0.4959, "step": 2054 }, { "epoch": 0.45727636849132175, "grad_norm": 1.0010917017180196, "learning_rate": 5.933224979202945e-06, "loss": 0.4829, "step": 2055 }, { "epoch": 0.4574988874054295, "grad_norm": 0.9692583375276034, "learning_rate": 5.929684490772168e-06, "loss": 0.4915, "step": 2056 }, { "epoch": 0.45772140631953717, "grad_norm": 1.1098861604156505, "learning_rate": 5.9261435194365336e-06, "loss": 0.5005, "step": 2057 }, { "epoch": 0.45794392523364486, "grad_norm": 0.9299283508594708, "learning_rate": 5.9226020670353245e-06, "loss": 0.488, "step": 2058 }, { "epoch": 0.45816644414775254, "grad_norm": 1.0774060320978847, "learning_rate": 5.9190601354080705e-06, "loss": 0.5182, "step": 2059 }, { "epoch": 0.4583889630618603, "grad_norm": 0.9965030234535174, "learning_rate": 5.915517726394555e-06, "loss": 0.4906, "step": 2060 }, { "epoch": 0.45861148197596796, "grad_norm": 0.9981229130226773, "learning_rate": 5.911974841834807e-06, "loss": 0.4987, "step": 2061 }, { "epoch": 0.45883400089007564, "grad_norm": 1.0052101981129478, "learning_rate": 5.908431483569099e-06, "loss": 0.5162, "step": 2062 }, { "epoch": 0.4590565198041834, "grad_norm": 1.0302952310100872, "learning_rate": 5.9048876534379575e-06, "loss": 0.5088, "step": 2063 }, { "epoch": 0.45927903871829107, "grad_norm": 1.03300806795728, "learning_rate": 5.901343353282147e-06, "loss": 0.4803, "step": 2064 }, { "epoch": 0.45950155763239875, "grad_norm": 0.9644807784691894, "learning_rate": 5.897798584942681e-06, "loss": 0.4902, "step": 2065 }, { "epoch": 0.45972407654650643, "grad_norm": 1.0586220528280321, "learning_rate": 5.894253350260814e-06, "loss": 0.4825, "step": 2066 }, { "epoch": 0.45994659546061417, "grad_norm": 1.0346507603231858, "learning_rate": 5.8907076510780415e-06, "loss": 0.498, "step": 2067 }, { "epoch": 0.46016911437472185, "grad_norm": 0.9770712562771081, "learning_rate": 5.887161489236102e-06, "loss": 0.4844, "step": 2068 }, { "epoch": 0.46039163328882954, "grad_norm": 1.0505308335633778, "learning_rate": 5.8836148665769745e-06, "loss": 0.5083, "step": 2069 }, { "epoch": 0.4606141522029373, "grad_norm": 1.0662946043965404, "learning_rate": 5.8800677849428754e-06, "loss": 0.4998, "step": 2070 }, { "epoch": 0.46083667111704496, "grad_norm": 0.935371459003832, "learning_rate": 5.876520246176263e-06, "loss": 0.4775, "step": 2071 }, { "epoch": 0.46105919003115264, "grad_norm": 0.974847500392733, "learning_rate": 5.87297225211983e-06, "loss": 0.4941, "step": 2072 }, { "epoch": 0.4612817089452603, "grad_norm": 1.4638211781029626, "learning_rate": 5.869423804616504e-06, "loss": 0.4753, "step": 2073 }, { "epoch": 0.46150422785936807, "grad_norm": 0.9715232570835499, "learning_rate": 5.865874905509455e-06, "loss": 0.5051, "step": 2074 }, { "epoch": 0.46172674677347575, "grad_norm": 0.9996122589777866, "learning_rate": 5.862325556642081e-06, "loss": 0.5047, "step": 2075 }, { "epoch": 0.46194926568758343, "grad_norm": 1.0671290042186954, "learning_rate": 5.858775759858018e-06, "loss": 0.5118, "step": 2076 }, { "epoch": 0.46217178460169117, "grad_norm": 0.9534554644419381, "learning_rate": 5.855225517001128e-06, "loss": 0.4869, "step": 2077 }, { "epoch": 0.46239430351579885, "grad_norm": 1.0224348892355117, "learning_rate": 5.851674829915512e-06, "loss": 0.4958, "step": 2078 }, { "epoch": 0.46261682242990654, "grad_norm": 0.9839558022153396, "learning_rate": 5.8481237004455e-06, "loss": 0.4851, "step": 2079 }, { "epoch": 0.4628393413440142, "grad_norm": 1.036012944838104, "learning_rate": 5.844572130435649e-06, "loss": 0.5067, "step": 2080 }, { "epoch": 0.46306186025812196, "grad_norm": 1.0238932399918366, "learning_rate": 5.841020121730747e-06, "loss": 0.4954, "step": 2081 }, { "epoch": 0.46328437917222964, "grad_norm": 0.9948019918835129, "learning_rate": 5.837467676175812e-06, "loss": 0.502, "step": 2082 }, { "epoch": 0.4635068980863373, "grad_norm": 0.9854812061179837, "learning_rate": 5.8339147956160815e-06, "loss": 0.4738, "step": 2083 }, { "epoch": 0.46372941700044507, "grad_norm": 1.0952015759753495, "learning_rate": 5.830361481897027e-06, "loss": 0.5103, "step": 2084 }, { "epoch": 0.46395193591455275, "grad_norm": 1.0461668034780702, "learning_rate": 5.826807736864342e-06, "loss": 0.4998, "step": 2085 }, { "epoch": 0.46417445482866043, "grad_norm": 1.0448589806754738, "learning_rate": 5.823253562363942e-06, "loss": 0.4919, "step": 2086 }, { "epoch": 0.4643969737427681, "grad_norm": 1.0879394720184572, "learning_rate": 5.819698960241969e-06, "loss": 0.5089, "step": 2087 }, { "epoch": 0.46461949265687585, "grad_norm": 1.0104233210928977, "learning_rate": 5.8161439323447846e-06, "loss": 0.5088, "step": 2088 }, { "epoch": 0.46484201157098354, "grad_norm": 1.0118039039303732, "learning_rate": 5.8125884805189735e-06, "loss": 0.5001, "step": 2089 }, { "epoch": 0.4650645304850912, "grad_norm": 0.9637846580231889, "learning_rate": 5.80903260661134e-06, "loss": 0.4867, "step": 2090 }, { "epoch": 0.4652870493991989, "grad_norm": 1.0486825000299855, "learning_rate": 5.805476312468906e-06, "loss": 0.5162, "step": 2091 }, { "epoch": 0.46550956831330664, "grad_norm": 0.9545660597952677, "learning_rate": 5.801919599938913e-06, "loss": 0.48, "step": 2092 }, { "epoch": 0.4657320872274143, "grad_norm": 1.0176882969091223, "learning_rate": 5.798362470868819e-06, "loss": 0.4913, "step": 2093 }, { "epoch": 0.465954606141522, "grad_norm": 1.0488867322192743, "learning_rate": 5.7948049271063015e-06, "loss": 0.4945, "step": 2094 }, { "epoch": 0.46617712505562975, "grad_norm": 0.9430040248210529, "learning_rate": 5.79124697049925e-06, "loss": 0.4865, "step": 2095 }, { "epoch": 0.46639964396973743, "grad_norm": 1.0428871415560834, "learning_rate": 5.787688602895768e-06, "loss": 0.5131, "step": 2096 }, { "epoch": 0.4666221628838451, "grad_norm": 1.0278761716048481, "learning_rate": 5.784129826144176e-06, "loss": 0.493, "step": 2097 }, { "epoch": 0.4668446817979528, "grad_norm": 1.035711034473737, "learning_rate": 5.780570642093004e-06, "loss": 0.4681, "step": 2098 }, { "epoch": 0.46706720071206054, "grad_norm": 1.0412966586624604, "learning_rate": 5.777011052590994e-06, "loss": 0.4906, "step": 2099 }, { "epoch": 0.4672897196261682, "grad_norm": 1.0404946873264644, "learning_rate": 5.773451059487101e-06, "loss": 0.4956, "step": 2100 }, { "epoch": 0.4675122385402759, "grad_norm": 1.022554905690725, "learning_rate": 5.769890664630486e-06, "loss": 0.4808, "step": 2101 }, { "epoch": 0.46773475745438364, "grad_norm": 1.0403142260493816, "learning_rate": 5.76632986987052e-06, "loss": 0.4975, "step": 2102 }, { "epoch": 0.4679572763684913, "grad_norm": 1.0110529420125667, "learning_rate": 5.762768677056786e-06, "loss": 0.483, "step": 2103 }, { "epoch": 0.468179795282599, "grad_norm": 1.1007198804043195, "learning_rate": 5.759207088039065e-06, "loss": 0.4882, "step": 2104 }, { "epoch": 0.4684023141967067, "grad_norm": 1.109133017648297, "learning_rate": 5.7556451046673516e-06, "loss": 0.4895, "step": 2105 }, { "epoch": 0.46862483311081443, "grad_norm": 1.118831893572354, "learning_rate": 5.752082728791842e-06, "loss": 0.4754, "step": 2106 }, { "epoch": 0.4688473520249221, "grad_norm": 1.0661696328833479, "learning_rate": 5.748519962262937e-06, "loss": 0.5017, "step": 2107 }, { "epoch": 0.4690698709390298, "grad_norm": 1.0414913449059169, "learning_rate": 5.744956806931238e-06, "loss": 0.4961, "step": 2108 }, { "epoch": 0.46929238985313754, "grad_norm": 1.168714894067726, "learning_rate": 5.741393264647552e-06, "loss": 0.5049, "step": 2109 }, { "epoch": 0.4695149087672452, "grad_norm": 1.0692562288331817, "learning_rate": 5.737829337262887e-06, "loss": 0.5051, "step": 2110 }, { "epoch": 0.4697374276813529, "grad_norm": 1.13153049612546, "learning_rate": 5.734265026628445e-06, "loss": 0.4958, "step": 2111 }, { "epoch": 0.4699599465954606, "grad_norm": 1.1138993332484646, "learning_rate": 5.730700334595636e-06, "loss": 0.5151, "step": 2112 }, { "epoch": 0.4701824655095683, "grad_norm": 1.101521088222484, "learning_rate": 5.72713526301606e-06, "loss": 0.4802, "step": 2113 }, { "epoch": 0.470404984423676, "grad_norm": 1.0518780801186185, "learning_rate": 5.72356981374152e-06, "loss": 0.4929, "step": 2114 }, { "epoch": 0.4706275033377837, "grad_norm": 1.0398082909600974, "learning_rate": 5.720003988624011e-06, "loss": 0.5026, "step": 2115 }, { "epoch": 0.47085002225189143, "grad_norm": 1.0370470650893948, "learning_rate": 5.716437789515728e-06, "loss": 0.4901, "step": 2116 }, { "epoch": 0.4710725411659991, "grad_norm": 1.0482124382281643, "learning_rate": 5.712871218269053e-06, "loss": 0.5047, "step": 2117 }, { "epoch": 0.4712950600801068, "grad_norm": 0.9675004179416621, "learning_rate": 5.709304276736569e-06, "loss": 0.4745, "step": 2118 }, { "epoch": 0.4715175789942145, "grad_norm": 1.0581828984421193, "learning_rate": 5.705736966771047e-06, "loss": 0.4852, "step": 2119 }, { "epoch": 0.4717400979083222, "grad_norm": 1.174733318166757, "learning_rate": 5.702169290225451e-06, "loss": 0.5014, "step": 2120 }, { "epoch": 0.4719626168224299, "grad_norm": 1.0469054571338172, "learning_rate": 5.698601248952935e-06, "loss": 0.48, "step": 2121 }, { "epoch": 0.4721851357365376, "grad_norm": 1.0334464746358316, "learning_rate": 5.695032844806842e-06, "loss": 0.5079, "step": 2122 }, { "epoch": 0.4724076546506453, "grad_norm": 1.0200682734371351, "learning_rate": 5.691464079640702e-06, "loss": 0.4911, "step": 2123 }, { "epoch": 0.472630173564753, "grad_norm": 1.030475477972892, "learning_rate": 5.687894955308236e-06, "loss": 0.4841, "step": 2124 }, { "epoch": 0.4728526924788607, "grad_norm": 1.0398564212282884, "learning_rate": 5.684325473663349e-06, "loss": 0.4972, "step": 2125 }, { "epoch": 0.4730752113929684, "grad_norm": 1.0868515664098763, "learning_rate": 5.680755636560132e-06, "loss": 0.4971, "step": 2126 }, { "epoch": 0.4732977303070761, "grad_norm": 0.998560096240494, "learning_rate": 5.677185445852864e-06, "loss": 0.4781, "step": 2127 }, { "epoch": 0.4735202492211838, "grad_norm": 1.0158795624146857, "learning_rate": 5.673614903396001e-06, "loss": 0.4925, "step": 2128 }, { "epoch": 0.4737427681352915, "grad_norm": 0.9867947718744213, "learning_rate": 5.670044011044187e-06, "loss": 0.4861, "step": 2129 }, { "epoch": 0.4739652870493992, "grad_norm": 1.0284545194959422, "learning_rate": 5.666472770652246e-06, "loss": 0.4871, "step": 2130 }, { "epoch": 0.4741878059635069, "grad_norm": 1.0010968085506815, "learning_rate": 5.662901184075185e-06, "loss": 0.4798, "step": 2131 }, { "epoch": 0.4744103248776146, "grad_norm": 1.087573518783587, "learning_rate": 5.6593292531681855e-06, "loss": 0.494, "step": 2132 }, { "epoch": 0.47463284379172227, "grad_norm": 0.9996824752739114, "learning_rate": 5.655756979786611e-06, "loss": 0.4848, "step": 2133 }, { "epoch": 0.47485536270583, "grad_norm": 1.231139063371983, "learning_rate": 5.6521843657860066e-06, "loss": 0.5151, "step": 2134 }, { "epoch": 0.4750778816199377, "grad_norm": 1.1181991224810108, "learning_rate": 5.6486114130220875e-06, "loss": 0.4964, "step": 2135 }, { "epoch": 0.4753004005340454, "grad_norm": 0.9513467808320346, "learning_rate": 5.645038123350749e-06, "loss": 0.4788, "step": 2136 }, { "epoch": 0.4755229194481531, "grad_norm": 1.2100078468551272, "learning_rate": 5.641464498628062e-06, "loss": 0.4873, "step": 2137 }, { "epoch": 0.4757454383622608, "grad_norm": 1.173824267303197, "learning_rate": 5.637890540710268e-06, "loss": 0.4966, "step": 2138 }, { "epoch": 0.4759679572763685, "grad_norm": 1.1659555213960957, "learning_rate": 5.6343162514537845e-06, "loss": 0.5004, "step": 2139 }, { "epoch": 0.47619047619047616, "grad_norm": 1.2891547266230345, "learning_rate": 5.630741632715198e-06, "loss": 0.4783, "step": 2140 }, { "epoch": 0.4764129951045839, "grad_norm": 0.9506723908714358, "learning_rate": 5.627166686351272e-06, "loss": 0.4862, "step": 2141 }, { "epoch": 0.4766355140186916, "grad_norm": 1.0649384468259082, "learning_rate": 5.623591414218934e-06, "loss": 0.5055, "step": 2142 }, { "epoch": 0.47685803293279927, "grad_norm": 1.1595638403493047, "learning_rate": 5.620015818175284e-06, "loss": 0.4933, "step": 2143 }, { "epoch": 0.477080551846907, "grad_norm": 1.1128150465226536, "learning_rate": 5.6164399000775895e-06, "loss": 0.5006, "step": 2144 }, { "epoch": 0.4773030707610147, "grad_norm": 1.0292761099320906, "learning_rate": 5.612863661783283e-06, "loss": 0.5026, "step": 2145 }, { "epoch": 0.4775255896751224, "grad_norm": 1.1009085491350428, "learning_rate": 5.609287105149969e-06, "loss": 0.4849, "step": 2146 }, { "epoch": 0.47774810858923006, "grad_norm": 1.171902325116806, "learning_rate": 5.605710232035412e-06, "loss": 0.5173, "step": 2147 }, { "epoch": 0.4779706275033378, "grad_norm": 1.0193184984888413, "learning_rate": 5.602133044297542e-06, "loss": 0.4872, "step": 2148 }, { "epoch": 0.4781931464174455, "grad_norm": 1.0874439735573125, "learning_rate": 5.598555543794455e-06, "loss": 0.4976, "step": 2149 }, { "epoch": 0.47841566533155316, "grad_norm": 1.0565284264774326, "learning_rate": 5.5949777323844055e-06, "loss": 0.4777, "step": 2150 }, { "epoch": 0.4786381842456609, "grad_norm": 1.0498072771235283, "learning_rate": 5.591399611925813e-06, "loss": 0.5007, "step": 2151 }, { "epoch": 0.4788607031597686, "grad_norm": 1.0347795467890333, "learning_rate": 5.5878211842772575e-06, "loss": 0.4757, "step": 2152 }, { "epoch": 0.47908322207387627, "grad_norm": 1.0499108774054575, "learning_rate": 5.584242451297476e-06, "loss": 0.497, "step": 2153 }, { "epoch": 0.47930574098798395, "grad_norm": 1.1201317172516851, "learning_rate": 5.580663414845363e-06, "loss": 0.4864, "step": 2154 }, { "epoch": 0.4795282599020917, "grad_norm": 1.1022821157711815, "learning_rate": 5.5770840767799765e-06, "loss": 0.5168, "step": 2155 }, { "epoch": 0.4797507788161994, "grad_norm": 1.025662866681872, "learning_rate": 5.573504438960524e-06, "loss": 0.4811, "step": 2156 }, { "epoch": 0.47997329773030706, "grad_norm": 1.0410756688414338, "learning_rate": 5.569924503246376e-06, "loss": 0.5067, "step": 2157 }, { "epoch": 0.4801958166444148, "grad_norm": 1.0456480955167895, "learning_rate": 5.566344271497053e-06, "loss": 0.4736, "step": 2158 }, { "epoch": 0.4804183355585225, "grad_norm": 1.1708065663512397, "learning_rate": 5.56276374557223e-06, "loss": 0.5044, "step": 2159 }, { "epoch": 0.48064085447263016, "grad_norm": 1.029735856496578, "learning_rate": 5.5591829273317325e-06, "loss": 0.4848, "step": 2160 }, { "epoch": 0.48086337338673785, "grad_norm": 1.0176088718183207, "learning_rate": 5.555601818635544e-06, "loss": 0.4925, "step": 2161 }, { "epoch": 0.4810858923008456, "grad_norm": 0.9660181908809662, "learning_rate": 5.552020421343795e-06, "loss": 0.4967, "step": 2162 }, { "epoch": 0.48130841121495327, "grad_norm": 0.9884539006430942, "learning_rate": 5.548438737316764e-06, "loss": 0.484, "step": 2163 }, { "epoch": 0.48153093012906095, "grad_norm": 0.9658985918980676, "learning_rate": 5.544856768414884e-06, "loss": 0.492, "step": 2164 }, { "epoch": 0.4817534490431687, "grad_norm": 1.0705279502497786, "learning_rate": 5.541274516498731e-06, "loss": 0.4826, "step": 2165 }, { "epoch": 0.4819759679572764, "grad_norm": 1.011460555172384, "learning_rate": 5.537691983429028e-06, "loss": 0.4865, "step": 2166 }, { "epoch": 0.48219848687138406, "grad_norm": 1.0499675421602228, "learning_rate": 5.5341091710666475e-06, "loss": 0.4922, "step": 2167 }, { "epoch": 0.48242100578549174, "grad_norm": 1.0394359821464467, "learning_rate": 5.530526081272605e-06, "loss": 0.491, "step": 2168 }, { "epoch": 0.4826435246995995, "grad_norm": 0.9652520718405374, "learning_rate": 5.526942715908061e-06, "loss": 0.481, "step": 2169 }, { "epoch": 0.48286604361370716, "grad_norm": 1.0242262064366978, "learning_rate": 5.523359076834316e-06, "loss": 0.5039, "step": 2170 }, { "epoch": 0.48308856252781485, "grad_norm": 1.035124409415937, "learning_rate": 5.5197751659128174e-06, "loss": 0.503, "step": 2171 }, { "epoch": 0.4833110814419226, "grad_norm": 0.9927741087391407, "learning_rate": 5.516190985005152e-06, "loss": 0.4765, "step": 2172 }, { "epoch": 0.48353360035603027, "grad_norm": 1.009661969985637, "learning_rate": 5.512606535973045e-06, "loss": 0.4878, "step": 2173 }, { "epoch": 0.48375611927013795, "grad_norm": 1.004193624234214, "learning_rate": 5.509021820678364e-06, "loss": 0.4915, "step": 2174 }, { "epoch": 0.48397863818424564, "grad_norm": 1.0390497775015777, "learning_rate": 5.505436840983112e-06, "loss": 0.5087, "step": 2175 }, { "epoch": 0.4842011570983534, "grad_norm": 0.9887428158301187, "learning_rate": 5.501851598749429e-06, "loss": 0.4865, "step": 2176 }, { "epoch": 0.48442367601246106, "grad_norm": 1.016390817466927, "learning_rate": 5.498266095839595e-06, "loss": 0.5067, "step": 2177 }, { "epoch": 0.48464619492656874, "grad_norm": 1.112125582838502, "learning_rate": 5.494680334116024e-06, "loss": 0.4801, "step": 2178 }, { "epoch": 0.4848687138406765, "grad_norm": 1.0566681597285223, "learning_rate": 5.491094315441262e-06, "loss": 0.4931, "step": 2179 }, { "epoch": 0.48509123275478416, "grad_norm": 1.0371245877314896, "learning_rate": 5.487508041677992e-06, "loss": 0.4949, "step": 2180 }, { "epoch": 0.48531375166889185, "grad_norm": 1.0578566318972136, "learning_rate": 5.483921514689023e-06, "loss": 0.4956, "step": 2181 }, { "epoch": 0.48553627058299953, "grad_norm": 1.024844596342854, "learning_rate": 5.480334736337306e-06, "loss": 0.4791, "step": 2182 }, { "epoch": 0.48575878949710727, "grad_norm": 1.1044355662888643, "learning_rate": 5.476747708485915e-06, "loss": 0.506, "step": 2183 }, { "epoch": 0.48598130841121495, "grad_norm": 1.0855840702957538, "learning_rate": 5.4731604329980555e-06, "loss": 0.4885, "step": 2184 }, { "epoch": 0.48620382732532264, "grad_norm": 1.032942346461599, "learning_rate": 5.46957291173706e-06, "loss": 0.4919, "step": 2185 }, { "epoch": 0.4864263462394304, "grad_norm": 1.0894219345121374, "learning_rate": 5.465985146566392e-06, "loss": 0.4907, "step": 2186 }, { "epoch": 0.48664886515353806, "grad_norm": 1.0417336789284013, "learning_rate": 5.4623971393496386e-06, "loss": 0.499, "step": 2187 }, { "epoch": 0.48687138406764574, "grad_norm": 1.0984731167310708, "learning_rate": 5.4588088919505144e-06, "loss": 0.4938, "step": 2188 }, { "epoch": 0.4870939029817534, "grad_norm": 1.2395294742535166, "learning_rate": 5.45522040623286e-06, "loss": 0.4814, "step": 2189 }, { "epoch": 0.48731642189586116, "grad_norm": 0.9976411113583299, "learning_rate": 5.451631684060635e-06, "loss": 0.4907, "step": 2190 }, { "epoch": 0.48753894080996885, "grad_norm": 1.0505221990282827, "learning_rate": 5.448042727297925e-06, "loss": 0.4933, "step": 2191 }, { "epoch": 0.48776145972407653, "grad_norm": 1.063775743856518, "learning_rate": 5.444453537808941e-06, "loss": 0.5, "step": 2192 }, { "epoch": 0.48798397863818427, "grad_norm": 1.154284275635124, "learning_rate": 5.440864117458008e-06, "loss": 0.5071, "step": 2193 }, { "epoch": 0.48820649755229195, "grad_norm": 1.0051619096329987, "learning_rate": 5.4372744681095744e-06, "loss": 0.4937, "step": 2194 }, { "epoch": 0.48842901646639963, "grad_norm": 0.9740098494760085, "learning_rate": 5.433684591628209e-06, "loss": 0.4893, "step": 2195 }, { "epoch": 0.4886515353805073, "grad_norm": 1.0751805988604566, "learning_rate": 5.4300944898785965e-06, "loss": 0.4807, "step": 2196 }, { "epoch": 0.48887405429461506, "grad_norm": 1.093111658314658, "learning_rate": 5.426504164725537e-06, "loss": 0.4769, "step": 2197 }, { "epoch": 0.48909657320872274, "grad_norm": 1.1447341819124541, "learning_rate": 5.422913618033949e-06, "loss": 0.5093, "step": 2198 }, { "epoch": 0.4893190921228304, "grad_norm": 1.213167225383026, "learning_rate": 5.419322851668869e-06, "loss": 0.4979, "step": 2199 }, { "epoch": 0.48954161103693816, "grad_norm": 1.0079079865714797, "learning_rate": 5.41573186749544e-06, "loss": 0.4824, "step": 2200 }, { "epoch": 0.48976412995104585, "grad_norm": 1.0817401860535487, "learning_rate": 5.412140667378926e-06, "loss": 0.4933, "step": 2201 }, { "epoch": 0.48998664886515353, "grad_norm": 1.157974651770948, "learning_rate": 5.408549253184696e-06, "loss": 0.4936, "step": 2202 }, { "epoch": 0.4902091677792612, "grad_norm": 1.0611036340512947, "learning_rate": 5.404957626778236e-06, "loss": 0.4847, "step": 2203 }, { "epoch": 0.49043168669336895, "grad_norm": 1.1530983892671385, "learning_rate": 5.401365790025143e-06, "loss": 0.5088, "step": 2204 }, { "epoch": 0.49065420560747663, "grad_norm": 1.0566554188259236, "learning_rate": 5.397773744791115e-06, "loss": 0.4688, "step": 2205 }, { "epoch": 0.4908767245215843, "grad_norm": 1.2357666996300374, "learning_rate": 5.394181492941967e-06, "loss": 0.4998, "step": 2206 }, { "epoch": 0.49109924343569206, "grad_norm": 1.0397453269642933, "learning_rate": 5.390589036343614e-06, "loss": 0.4954, "step": 2207 }, { "epoch": 0.49132176234979974, "grad_norm": 0.9802797030324135, "learning_rate": 5.3869963768620845e-06, "loss": 0.4672, "step": 2208 }, { "epoch": 0.4915442812639074, "grad_norm": 1.0438755858188917, "learning_rate": 5.38340351636351e-06, "loss": 0.4924, "step": 2209 }, { "epoch": 0.4917668001780151, "grad_norm": 1.0159904480595108, "learning_rate": 5.379810456714123e-06, "loss": 0.4764, "step": 2210 }, { "epoch": 0.49198931909212285, "grad_norm": 1.059344416538875, "learning_rate": 5.3762171997802636e-06, "loss": 0.4794, "step": 2211 }, { "epoch": 0.49221183800623053, "grad_norm": 0.9585916145204149, "learning_rate": 5.3726237474283715e-06, "loss": 0.48, "step": 2212 }, { "epoch": 0.4924343569203382, "grad_norm": 1.0478392350880805, "learning_rate": 5.36903010152499e-06, "loss": 0.4987, "step": 2213 }, { "epoch": 0.49265687583444595, "grad_norm": 1.073776810911531, "learning_rate": 5.365436263936763e-06, "loss": 0.4894, "step": 2214 }, { "epoch": 0.49287939474855363, "grad_norm": 1.0265025388084388, "learning_rate": 5.361842236530433e-06, "loss": 0.5007, "step": 2215 }, { "epoch": 0.4931019136626613, "grad_norm": 1.1157694181922106, "learning_rate": 5.358248021172838e-06, "loss": 0.4866, "step": 2216 }, { "epoch": 0.493324432576769, "grad_norm": 1.040973972522938, "learning_rate": 5.354653619730921e-06, "loss": 0.4785, "step": 2217 }, { "epoch": 0.49354695149087674, "grad_norm": 1.1468308019610896, "learning_rate": 5.351059034071715e-06, "loss": 0.5023, "step": 2218 }, { "epoch": 0.4937694704049844, "grad_norm": 1.072243378995347, "learning_rate": 5.347464266062351e-06, "loss": 0.5048, "step": 2219 }, { "epoch": 0.4939919893190921, "grad_norm": 1.0143841018374606, "learning_rate": 5.343869317570056e-06, "loss": 0.4997, "step": 2220 }, { "epoch": 0.49421450823319985, "grad_norm": 1.0780887627653486, "learning_rate": 5.3402741904621515e-06, "loss": 0.5082, "step": 2221 }, { "epoch": 0.49443702714730753, "grad_norm": 1.0437593604480744, "learning_rate": 5.336678886606045e-06, "loss": 0.4792, "step": 2222 }, { "epoch": 0.4946595460614152, "grad_norm": 1.068751103031622, "learning_rate": 5.3330834078692424e-06, "loss": 0.4877, "step": 2223 }, { "epoch": 0.4948820649755229, "grad_norm": 1.0315284379030267, "learning_rate": 5.329487756119342e-06, "loss": 0.4838, "step": 2224 }, { "epoch": 0.49510458388963063, "grad_norm": 0.9740227106670263, "learning_rate": 5.325891933224025e-06, "loss": 0.4939, "step": 2225 }, { "epoch": 0.4953271028037383, "grad_norm": 0.9434543605511669, "learning_rate": 5.322295941051069e-06, "loss": 0.4913, "step": 2226 }, { "epoch": 0.495549621717846, "grad_norm": 1.0560179031993608, "learning_rate": 5.3186997814683325e-06, "loss": 0.4971, "step": 2227 }, { "epoch": 0.49577214063195374, "grad_norm": 0.954422152140364, "learning_rate": 5.315103456343764e-06, "loss": 0.4933, "step": 2228 }, { "epoch": 0.4959946595460614, "grad_norm": 1.0143445940945481, "learning_rate": 5.3115069675454e-06, "loss": 0.489, "step": 2229 }, { "epoch": 0.4962171784601691, "grad_norm": 0.9968421257745371, "learning_rate": 5.3079103169413596e-06, "loss": 0.4699, "step": 2230 }, { "epoch": 0.4964396973742768, "grad_norm": 1.0390637956710786, "learning_rate": 5.304313506399845e-06, "loss": 0.4871, "step": 2231 }, { "epoch": 0.49666221628838453, "grad_norm": 1.1040528430240963, "learning_rate": 5.300716537789147e-06, "loss": 0.4889, "step": 2232 }, { "epoch": 0.4968847352024922, "grad_norm": 1.0642844909266007, "learning_rate": 5.2971194129776295e-06, "loss": 0.4776, "step": 2233 }, { "epoch": 0.4971072541165999, "grad_norm": 1.041524839553476, "learning_rate": 5.293522133833745e-06, "loss": 0.4837, "step": 2234 }, { "epoch": 0.49732977303070763, "grad_norm": 1.146491614537761, "learning_rate": 5.289924702226026e-06, "loss": 0.4894, "step": 2235 }, { "epoch": 0.4975522919448153, "grad_norm": 1.0390542113407726, "learning_rate": 5.286327120023079e-06, "loss": 0.4824, "step": 2236 }, { "epoch": 0.497774810858923, "grad_norm": 1.09480282012572, "learning_rate": 5.282729389093591e-06, "loss": 0.4807, "step": 2237 }, { "epoch": 0.4979973297730307, "grad_norm": 1.016682397465586, "learning_rate": 5.27913151130633e-06, "loss": 0.4771, "step": 2238 }, { "epoch": 0.4982198486871384, "grad_norm": 1.155457855433288, "learning_rate": 5.275533488530134e-06, "loss": 0.4684, "step": 2239 }, { "epoch": 0.4984423676012461, "grad_norm": 1.0765623641376143, "learning_rate": 5.271935322633921e-06, "loss": 0.4923, "step": 2240 }, { "epoch": 0.4986648865153538, "grad_norm": 1.0941347119908067, "learning_rate": 5.268337015486683e-06, "loss": 0.4918, "step": 2241 }, { "epoch": 0.49888740542946153, "grad_norm": 1.0895185628239632, "learning_rate": 5.2647385689574826e-06, "loss": 0.5027, "step": 2242 }, { "epoch": 0.4991099243435692, "grad_norm": 1.046100077092167, "learning_rate": 5.261139984915455e-06, "loss": 0.4886, "step": 2243 }, { "epoch": 0.4993324432576769, "grad_norm": 1.0406108563241172, "learning_rate": 5.257541265229812e-06, "loss": 0.4832, "step": 2244 }, { "epoch": 0.4995549621717846, "grad_norm": 1.0969274862915859, "learning_rate": 5.253942411769829e-06, "loss": 0.4777, "step": 2245 }, { "epoch": 0.4997774810858923, "grad_norm": 1.0417056053187344, "learning_rate": 5.250343426404857e-06, "loss": 0.4985, "step": 2246 }, { "epoch": 0.5, "grad_norm": 1.0460118516176613, "learning_rate": 5.2467443110043084e-06, "loss": 0.4815, "step": 2247 }, { "epoch": 0.5002225189141077, "grad_norm": 1.3104478020761645, "learning_rate": 5.24314506743767e-06, "loss": 0.5108, "step": 2248 }, { "epoch": 0.5004450378282154, "grad_norm": 1.0755401417982846, "learning_rate": 5.23954569757449e-06, "loss": 0.4821, "step": 2249 }, { "epoch": 0.5006675567423231, "grad_norm": 1.0726582251063093, "learning_rate": 5.235946203284389e-06, "loss": 0.4837, "step": 2250 }, { "epoch": 0.5008900756564308, "grad_norm": 1.1165532342595843, "learning_rate": 5.2323465864370445e-06, "loss": 0.4647, "step": 2251 }, { "epoch": 0.5011125945705385, "grad_norm": 1.0640335934523046, "learning_rate": 5.228746848902202e-06, "loss": 0.4907, "step": 2252 }, { "epoch": 0.5013351134846462, "grad_norm": 1.0935344484376304, "learning_rate": 5.225146992549668e-06, "loss": 0.497, "step": 2253 }, { "epoch": 0.5015576323987538, "grad_norm": 1.2956246356003436, "learning_rate": 5.221547019249311e-06, "loss": 0.4991, "step": 2254 }, { "epoch": 0.5017801513128616, "grad_norm": 1.1071103219991356, "learning_rate": 5.217946930871063e-06, "loss": 0.4849, "step": 2255 }, { "epoch": 0.5020026702269693, "grad_norm": 1.1827804811843485, "learning_rate": 5.21434672928491e-06, "loss": 0.4965, "step": 2256 }, { "epoch": 0.5022251891410769, "grad_norm": 1.1178926743779853, "learning_rate": 5.210746416360904e-06, "loss": 0.4753, "step": 2257 }, { "epoch": 0.5024477080551847, "grad_norm": 1.0375500748009057, "learning_rate": 5.20714599396915e-06, "loss": 0.4955, "step": 2258 }, { "epoch": 0.5026702269692924, "grad_norm": 1.0527638673679978, "learning_rate": 5.203545463979807e-06, "loss": 0.4733, "step": 2259 }, { "epoch": 0.5028927458834, "grad_norm": 0.9832999477007027, "learning_rate": 5.199944828263099e-06, "loss": 0.4672, "step": 2260 }, { "epoch": 0.5031152647975078, "grad_norm": 1.0111547727185226, "learning_rate": 5.196344088689298e-06, "loss": 0.4897, "step": 2261 }, { "epoch": 0.5033377837116155, "grad_norm": 1.0485049764871037, "learning_rate": 5.19274324712873e-06, "loss": 0.4844, "step": 2262 }, { "epoch": 0.5035603026257232, "grad_norm": 1.0449968405669232, "learning_rate": 5.189142305451777e-06, "loss": 0.4762, "step": 2263 }, { "epoch": 0.5037828215398309, "grad_norm": 1.096602183136455, "learning_rate": 5.185541265528873e-06, "loss": 0.5022, "step": 2264 }, { "epoch": 0.5040053404539386, "grad_norm": 1.0717505585447427, "learning_rate": 5.1819401292304985e-06, "loss": 0.49, "step": 2265 }, { "epoch": 0.5042278593680463, "grad_norm": 1.0699192310788583, "learning_rate": 5.178338898427191e-06, "loss": 0.4893, "step": 2266 }, { "epoch": 0.504450378282154, "grad_norm": 1.0916770961238158, "learning_rate": 5.17473757498953e-06, "loss": 0.4861, "step": 2267 }, { "epoch": 0.5046728971962616, "grad_norm": 1.0805205369289554, "learning_rate": 5.171136160788148e-06, "loss": 0.4831, "step": 2268 }, { "epoch": 0.5048954161103694, "grad_norm": 1.1563377164402584, "learning_rate": 5.167534657693725e-06, "loss": 0.4719, "step": 2269 }, { "epoch": 0.5051179350244771, "grad_norm": 1.0076630051620958, "learning_rate": 5.163933067576981e-06, "loss": 0.4731, "step": 2270 }, { "epoch": 0.5053404539385847, "grad_norm": 1.0054989371140324, "learning_rate": 5.1603313923086875e-06, "loss": 0.4776, "step": 2271 }, { "epoch": 0.5055629728526925, "grad_norm": 1.1220667207814383, "learning_rate": 5.156729633759659e-06, "loss": 0.4773, "step": 2272 }, { "epoch": 0.5057854917668002, "grad_norm": 1.0384787861233045, "learning_rate": 5.153127793800753e-06, "loss": 0.4817, "step": 2273 }, { "epoch": 0.5060080106809078, "grad_norm": 1.050714451497909, "learning_rate": 5.149525874302867e-06, "loss": 0.4832, "step": 2274 }, { "epoch": 0.5062305295950156, "grad_norm": 1.1169643073368143, "learning_rate": 5.14592387713694e-06, "loss": 0.4914, "step": 2275 }, { "epoch": 0.5064530485091233, "grad_norm": 1.0684201047169375, "learning_rate": 5.1423218041739575e-06, "loss": 0.4827, "step": 2276 }, { "epoch": 0.5066755674232309, "grad_norm": 1.0484778855740782, "learning_rate": 5.138719657284936e-06, "loss": 0.4881, "step": 2277 }, { "epoch": 0.5068980863373387, "grad_norm": 1.1245468690940554, "learning_rate": 5.135117438340938e-06, "loss": 0.4923, "step": 2278 }, { "epoch": 0.5071206052514464, "grad_norm": 1.1082818849443086, "learning_rate": 5.131515149213056e-06, "loss": 0.4817, "step": 2279 }, { "epoch": 0.507343124165554, "grad_norm": 1.0114742143106406, "learning_rate": 5.127912791772421e-06, "loss": 0.5023, "step": 2280 }, { "epoch": 0.5075656430796618, "grad_norm": 1.106939704145754, "learning_rate": 5.1243103678902065e-06, "loss": 0.4909, "step": 2281 }, { "epoch": 0.5077881619937694, "grad_norm": 1.0511668967358125, "learning_rate": 5.120707879437612e-06, "loss": 0.4828, "step": 2282 }, { "epoch": 0.5080106809078772, "grad_norm": 1.1317278680828058, "learning_rate": 5.117105328285874e-06, "loss": 0.4995, "step": 2283 }, { "epoch": 0.5082331998219849, "grad_norm": 1.1319306978124264, "learning_rate": 5.11350271630626e-06, "loss": 0.489, "step": 2284 }, { "epoch": 0.5084557187360925, "grad_norm": 1.1124202906916658, "learning_rate": 5.109900045370071e-06, "loss": 0.4896, "step": 2285 }, { "epoch": 0.5086782376502003, "grad_norm": 1.1364765648206019, "learning_rate": 5.10629731734864e-06, "loss": 0.4698, "step": 2286 }, { "epoch": 0.508900756564308, "grad_norm": 1.1014187569598262, "learning_rate": 5.102694534113324e-06, "loss": 0.4787, "step": 2287 }, { "epoch": 0.5091232754784156, "grad_norm": 1.0439983659358378, "learning_rate": 5.099091697535515e-06, "loss": 0.4849, "step": 2288 }, { "epoch": 0.5093457943925234, "grad_norm": 0.9988499450512713, "learning_rate": 5.09548880948663e-06, "loss": 0.4788, "step": 2289 }, { "epoch": 0.5095683133066311, "grad_norm": 1.0262842334540867, "learning_rate": 5.091885871838108e-06, "loss": 0.4849, "step": 2290 }, { "epoch": 0.5097908322207387, "grad_norm": 1.0093729238828004, "learning_rate": 5.088282886461425e-06, "loss": 0.4791, "step": 2291 }, { "epoch": 0.5100133511348465, "grad_norm": 1.0246941305604655, "learning_rate": 5.084679855228072e-06, "loss": 0.4764, "step": 2292 }, { "epoch": 0.5102358700489542, "grad_norm": 1.0281497545391318, "learning_rate": 5.081076780009565e-06, "loss": 0.4983, "step": 2293 }, { "epoch": 0.5104583889630618, "grad_norm": 1.003454275546028, "learning_rate": 5.077473662677449e-06, "loss": 0.4622, "step": 2294 }, { "epoch": 0.5106809078771696, "grad_norm": 1.0357823344219665, "learning_rate": 5.073870505103284e-06, "loss": 0.4872, "step": 2295 }, { "epoch": 0.5109034267912772, "grad_norm": 1.005056635683744, "learning_rate": 5.070267309158654e-06, "loss": 0.4766, "step": 2296 }, { "epoch": 0.5111259457053849, "grad_norm": 1.0465690991376309, "learning_rate": 5.066664076715164e-06, "loss": 0.487, "step": 2297 }, { "epoch": 0.5113484646194927, "grad_norm": 1.0681734580503113, "learning_rate": 5.063060809644436e-06, "loss": 0.4694, "step": 2298 }, { "epoch": 0.5115709835336003, "grad_norm": 1.0940038651172186, "learning_rate": 5.059457509818109e-06, "loss": 0.492, "step": 2299 }, { "epoch": 0.511793502447708, "grad_norm": 1.1349766141602413, "learning_rate": 5.055854179107842e-06, "loss": 0.4614, "step": 2300 }, { "epoch": 0.5120160213618158, "grad_norm": 1.0685331676075414, "learning_rate": 5.052250819385308e-06, "loss": 0.46, "step": 2301 }, { "epoch": 0.5122385402759234, "grad_norm": 1.0987930034626328, "learning_rate": 5.048647432522195e-06, "loss": 0.4693, "step": 2302 }, { "epoch": 0.5124610591900312, "grad_norm": 1.078356856161446, "learning_rate": 5.0450440203902094e-06, "loss": 0.4808, "step": 2303 }, { "epoch": 0.5126835781041389, "grad_norm": 1.1244177860652316, "learning_rate": 5.041440584861064e-06, "loss": 0.4988, "step": 2304 }, { "epoch": 0.5129060970182465, "grad_norm": 1.0978199667835304, "learning_rate": 5.0378371278064854e-06, "loss": 0.4732, "step": 2305 }, { "epoch": 0.5131286159323543, "grad_norm": 1.1400263845588423, "learning_rate": 5.034233651098214e-06, "loss": 0.4791, "step": 2306 }, { "epoch": 0.513351134846462, "grad_norm": 1.114648237983269, "learning_rate": 5.030630156608001e-06, "loss": 0.4839, "step": 2307 }, { "epoch": 0.5135736537605696, "grad_norm": 1.0387536305986707, "learning_rate": 5.027026646207603e-06, "loss": 0.4974, "step": 2308 }, { "epoch": 0.5137961726746774, "grad_norm": 1.0542531683655185, "learning_rate": 5.02342312176879e-06, "loss": 0.4632, "step": 2309 }, { "epoch": 0.514018691588785, "grad_norm": 1.0500862019054145, "learning_rate": 5.019819585163333e-06, "loss": 0.4847, "step": 2310 }, { "epoch": 0.5142412105028927, "grad_norm": 1.0848209760062713, "learning_rate": 5.016216038263012e-06, "loss": 0.4837, "step": 2311 }, { "epoch": 0.5144637294170005, "grad_norm": 1.0880113343729805, "learning_rate": 5.0126124829396164e-06, "loss": 0.4935, "step": 2312 }, { "epoch": 0.5146862483311081, "grad_norm": 1.1128389919070936, "learning_rate": 5.009008921064934e-06, "loss": 0.4738, "step": 2313 }, { "epoch": 0.5149087672452158, "grad_norm": 1.0821595180103458, "learning_rate": 5.0054053545107584e-06, "loss": 0.4974, "step": 2314 }, { "epoch": 0.5151312861593236, "grad_norm": 1.0852820180213132, "learning_rate": 5.0018017851488865e-06, "loss": 0.47, "step": 2315 }, { "epoch": 0.5153538050734312, "grad_norm": 1.0665406996332996, "learning_rate": 4.998198214851115e-06, "loss": 0.4879, "step": 2316 }, { "epoch": 0.5155763239875389, "grad_norm": 1.0748957442487066, "learning_rate": 4.994594645489242e-06, "loss": 0.4799, "step": 2317 }, { "epoch": 0.5157988429016467, "grad_norm": 1.0998063601817567, "learning_rate": 4.990991078935068e-06, "loss": 0.4644, "step": 2318 }, { "epoch": 0.5160213618157543, "grad_norm": 1.114720143633376, "learning_rate": 4.987387517060385e-06, "loss": 0.4844, "step": 2319 }, { "epoch": 0.516243880729862, "grad_norm": 1.0932103925619585, "learning_rate": 4.98378396173699e-06, "loss": 0.476, "step": 2320 }, { "epoch": 0.5164663996439698, "grad_norm": 1.0789146679533954, "learning_rate": 4.980180414836669e-06, "loss": 0.4813, "step": 2321 }, { "epoch": 0.5166889185580774, "grad_norm": 1.0547017237548648, "learning_rate": 4.9765768782312105e-06, "loss": 0.4851, "step": 2322 }, { "epoch": 0.5169114374721852, "grad_norm": 1.0906148320476925, "learning_rate": 4.972973353792397e-06, "loss": 0.4827, "step": 2323 }, { "epoch": 0.5171339563862928, "grad_norm": 1.168675035920748, "learning_rate": 4.969369843392e-06, "loss": 0.5141, "step": 2324 }, { "epoch": 0.5173564753004005, "grad_norm": 1.0448131499328939, "learning_rate": 4.965766348901788e-06, "loss": 0.4644, "step": 2325 }, { "epoch": 0.5175789942145083, "grad_norm": 1.0311480674444164, "learning_rate": 4.962162872193517e-06, "loss": 0.4746, "step": 2326 }, { "epoch": 0.5178015131286159, "grad_norm": 1.1918275405564112, "learning_rate": 4.95855941513894e-06, "loss": 0.4672, "step": 2327 }, { "epoch": 0.5180240320427236, "grad_norm": 1.0560405951634277, "learning_rate": 4.954955979609791e-06, "loss": 0.5047, "step": 2328 }, { "epoch": 0.5182465509568314, "grad_norm": 1.095887962573866, "learning_rate": 4.951352567477805e-06, "loss": 0.4835, "step": 2329 }, { "epoch": 0.518469069870939, "grad_norm": 1.2183651610899766, "learning_rate": 4.947749180614693e-06, "loss": 0.4915, "step": 2330 }, { "epoch": 0.5186915887850467, "grad_norm": 1.0437348740533623, "learning_rate": 4.944145820892159e-06, "loss": 0.4865, "step": 2331 }, { "epoch": 0.5189141076991545, "grad_norm": 1.1107597687603668, "learning_rate": 4.940542490181893e-06, "loss": 0.4777, "step": 2332 }, { "epoch": 0.5191366266132621, "grad_norm": 1.0315934080640308, "learning_rate": 4.936939190355566e-06, "loss": 0.4825, "step": 2333 }, { "epoch": 0.5193591455273698, "grad_norm": 1.02796903082191, "learning_rate": 4.933335923284836e-06, "loss": 0.4783, "step": 2334 }, { "epoch": 0.5195816644414776, "grad_norm": 1.1323484265029022, "learning_rate": 4.929732690841346e-06, "loss": 0.4758, "step": 2335 }, { "epoch": 0.5198041833555852, "grad_norm": 1.0611466357265498, "learning_rate": 4.926129494896717e-06, "loss": 0.4974, "step": 2336 }, { "epoch": 0.5200267022696929, "grad_norm": 1.0588191266030127, "learning_rate": 4.922526337322553e-06, "loss": 0.4701, "step": 2337 }, { "epoch": 0.5202492211838006, "grad_norm": 1.1349832668046422, "learning_rate": 4.9189232199904366e-06, "loss": 0.4741, "step": 2338 }, { "epoch": 0.5204717400979083, "grad_norm": 1.0505262606900927, "learning_rate": 4.915320144771931e-06, "loss": 0.4793, "step": 2339 }, { "epoch": 0.520694259012016, "grad_norm": 1.105523624214175, "learning_rate": 4.911717113538578e-06, "loss": 0.4792, "step": 2340 }, { "epoch": 0.5209167779261237, "grad_norm": 1.3631561301351625, "learning_rate": 4.908114128161892e-06, "loss": 0.4843, "step": 2341 }, { "epoch": 0.5211392968402314, "grad_norm": 1.1013808913507146, "learning_rate": 4.904511190513372e-06, "loss": 0.4962, "step": 2342 }, { "epoch": 0.5213618157543392, "grad_norm": 1.0956693029883116, "learning_rate": 4.900908302464486e-06, "loss": 0.4848, "step": 2343 }, { "epoch": 0.5215843346684468, "grad_norm": 1.0899872636654728, "learning_rate": 4.897305465886678e-06, "loss": 0.4766, "step": 2344 }, { "epoch": 0.5218068535825545, "grad_norm": 1.0365759835142303, "learning_rate": 4.893702682651363e-06, "loss": 0.4883, "step": 2345 }, { "epoch": 0.5220293724966623, "grad_norm": 1.0924871077700384, "learning_rate": 4.89009995462993e-06, "loss": 0.466, "step": 2346 }, { "epoch": 0.5222518914107699, "grad_norm": 1.1747147618818996, "learning_rate": 4.886497283693741e-06, "loss": 0.4979, "step": 2347 }, { "epoch": 0.5224744103248776, "grad_norm": 1.0784954098682111, "learning_rate": 4.882894671714128e-06, "loss": 0.4781, "step": 2348 }, { "epoch": 0.5226969292389854, "grad_norm": 1.0964467084845342, "learning_rate": 4.8792921205623895e-06, "loss": 0.478, "step": 2349 }, { "epoch": 0.522919448153093, "grad_norm": 1.1405255124240807, "learning_rate": 4.875689632109795e-06, "loss": 0.4916, "step": 2350 }, { "epoch": 0.5231419670672007, "grad_norm": 1.1322623635627065, "learning_rate": 4.872087208227581e-06, "loss": 0.4701, "step": 2351 }, { "epoch": 0.5233644859813084, "grad_norm": 1.0835766391049566, "learning_rate": 4.868484850786948e-06, "loss": 0.4788, "step": 2352 }, { "epoch": 0.5235870048954161, "grad_norm": 1.1561587915968383, "learning_rate": 4.864882561659063e-06, "loss": 0.482, "step": 2353 }, { "epoch": 0.5238095238095238, "grad_norm": 1.0668080975129872, "learning_rate": 4.861280342715064e-06, "loss": 0.4794, "step": 2354 }, { "epoch": 0.5240320427236315, "grad_norm": 1.0078402674741502, "learning_rate": 4.857678195826044e-06, "loss": 0.4715, "step": 2355 }, { "epoch": 0.5242545616377392, "grad_norm": 1.1982892626431527, "learning_rate": 4.854076122863062e-06, "loss": 0.5013, "step": 2356 }, { "epoch": 0.5244770805518469, "grad_norm": 1.1164736090811787, "learning_rate": 4.850474125697136e-06, "loss": 0.4903, "step": 2357 }, { "epoch": 0.5246995994659546, "grad_norm": 1.1247471586084588, "learning_rate": 4.8468722061992496e-06, "loss": 0.4811, "step": 2358 }, { "epoch": 0.5249221183800623, "grad_norm": 1.0607490592485609, "learning_rate": 4.8432703662403415e-06, "loss": 0.4821, "step": 2359 }, { "epoch": 0.52514463729417, "grad_norm": 1.166872782736195, "learning_rate": 4.839668607691313e-06, "loss": 0.478, "step": 2360 }, { "epoch": 0.5253671562082777, "grad_norm": 1.0715883921372702, "learning_rate": 4.836066932423021e-06, "loss": 0.4828, "step": 2361 }, { "epoch": 0.5255896751223854, "grad_norm": 1.1792541105812764, "learning_rate": 4.8324653423062775e-06, "loss": 0.4699, "step": 2362 }, { "epoch": 0.5258121940364932, "grad_norm": 1.031503452569524, "learning_rate": 4.828863839211853e-06, "loss": 0.4832, "step": 2363 }, { "epoch": 0.5260347129506008, "grad_norm": 1.0879230478564528, "learning_rate": 4.825262425010472e-06, "loss": 0.4758, "step": 2364 }, { "epoch": 0.5262572318647085, "grad_norm": 1.1292969300244229, "learning_rate": 4.82166110157281e-06, "loss": 0.4955, "step": 2365 }, { "epoch": 0.5264797507788161, "grad_norm": 1.0519309431660466, "learning_rate": 4.818059870769502e-06, "loss": 0.4805, "step": 2366 }, { "epoch": 0.5267022696929239, "grad_norm": 1.0665485978852245, "learning_rate": 4.814458734471129e-06, "loss": 0.4831, "step": 2367 }, { "epoch": 0.5269247886070316, "grad_norm": 1.017717694378482, "learning_rate": 4.8108576945482235e-06, "loss": 0.4554, "step": 2368 }, { "epoch": 0.5271473075211393, "grad_norm": 1.0994663350580063, "learning_rate": 4.807256752871272e-06, "loss": 0.4849, "step": 2369 }, { "epoch": 0.527369826435247, "grad_norm": 1.1100217588968204, "learning_rate": 4.803655911310705e-06, "loss": 0.4702, "step": 2370 }, { "epoch": 0.5275923453493547, "grad_norm": 1.1107825811967293, "learning_rate": 4.800055171736902e-06, "loss": 0.4838, "step": 2371 }, { "epoch": 0.5278148642634624, "grad_norm": 1.0822483236160079, "learning_rate": 4.796454536020193e-06, "loss": 0.4832, "step": 2372 }, { "epoch": 0.5280373831775701, "grad_norm": 1.0654290011456677, "learning_rate": 4.792854006030852e-06, "loss": 0.4802, "step": 2373 }, { "epoch": 0.5282599020916778, "grad_norm": 1.038449740192689, "learning_rate": 4.789253583639097e-06, "loss": 0.491, "step": 2374 }, { "epoch": 0.5284824210057855, "grad_norm": 1.0960197898066646, "learning_rate": 4.785653270715091e-06, "loss": 0.4751, "step": 2375 }, { "epoch": 0.5287049399198932, "grad_norm": 1.0994205553885985, "learning_rate": 4.78205306912894e-06, "loss": 0.4823, "step": 2376 }, { "epoch": 0.5289274588340009, "grad_norm": 1.079232233028082, "learning_rate": 4.77845298075069e-06, "loss": 0.4775, "step": 2377 }, { "epoch": 0.5291499777481086, "grad_norm": 1.094434329491808, "learning_rate": 4.774853007450334e-06, "loss": 0.481, "step": 2378 }, { "epoch": 0.5293724966622163, "grad_norm": 1.074360349474163, "learning_rate": 4.771253151097799e-06, "loss": 0.4919, "step": 2379 }, { "epoch": 0.5295950155763239, "grad_norm": 1.0862590117128, "learning_rate": 4.767653413562956e-06, "loss": 0.4875, "step": 2380 }, { "epoch": 0.5298175344904317, "grad_norm": 1.060397897246627, "learning_rate": 4.764053796715613e-06, "loss": 0.4841, "step": 2381 }, { "epoch": 0.5300400534045394, "grad_norm": 1.153654116720188, "learning_rate": 4.760454302425511e-06, "loss": 0.4838, "step": 2382 }, { "epoch": 0.530262572318647, "grad_norm": 1.0997445914308044, "learning_rate": 4.756854932562332e-06, "loss": 0.4779, "step": 2383 }, { "epoch": 0.5304850912327548, "grad_norm": 1.1398806895437554, "learning_rate": 4.753255688995692e-06, "loss": 0.4818, "step": 2384 }, { "epoch": 0.5307076101468625, "grad_norm": 1.1058023322132688, "learning_rate": 4.749656573595145e-06, "loss": 0.4832, "step": 2385 }, { "epoch": 0.5309301290609701, "grad_norm": 1.1071065901621133, "learning_rate": 4.746057588230172e-06, "loss": 0.4731, "step": 2386 }, { "epoch": 0.5311526479750779, "grad_norm": 1.1701830390556207, "learning_rate": 4.74245873477019e-06, "loss": 0.4765, "step": 2387 }, { "epoch": 0.5313751668891856, "grad_norm": 1.0541526091449749, "learning_rate": 4.738860015084546e-06, "loss": 0.475, "step": 2388 }, { "epoch": 0.5315976858032933, "grad_norm": 1.0818116618726816, "learning_rate": 4.73526143104252e-06, "loss": 0.4849, "step": 2389 }, { "epoch": 0.531820204717401, "grad_norm": 1.1294269586547758, "learning_rate": 4.7316629845133176e-06, "loss": 0.4842, "step": 2390 }, { "epoch": 0.5320427236315087, "grad_norm": 1.0223488343343494, "learning_rate": 4.728064677366079e-06, "loss": 0.4727, "step": 2391 }, { "epoch": 0.5322652425456164, "grad_norm": 1.0423781329848674, "learning_rate": 4.724466511469867e-06, "loss": 0.4661, "step": 2392 }, { "epoch": 0.5324877614597241, "grad_norm": 1.0331479343691092, "learning_rate": 4.720868488693671e-06, "loss": 0.4845, "step": 2393 }, { "epoch": 0.5327102803738317, "grad_norm": 1.0327422702678932, "learning_rate": 4.7172706109064096e-06, "loss": 0.4808, "step": 2394 }, { "epoch": 0.5329327992879395, "grad_norm": 1.0515410302910142, "learning_rate": 4.713672879976924e-06, "loss": 0.4856, "step": 2395 }, { "epoch": 0.5331553182020472, "grad_norm": 1.0988602583776577, "learning_rate": 4.710075297773974e-06, "loss": 0.4828, "step": 2396 }, { "epoch": 0.5333778371161548, "grad_norm": 1.1080647451397243, "learning_rate": 4.7064778661662545e-06, "loss": 0.4644, "step": 2397 }, { "epoch": 0.5336003560302626, "grad_norm": 1.0611738642140471, "learning_rate": 4.702880587022372e-06, "loss": 0.4685, "step": 2398 }, { "epoch": 0.5338228749443703, "grad_norm": 1.2136680202493688, "learning_rate": 4.699283462210855e-06, "loss": 0.4764, "step": 2399 }, { "epoch": 0.5340453938584779, "grad_norm": 1.059845270814101, "learning_rate": 4.6956864936001565e-06, "loss": 0.4854, "step": 2400 }, { "epoch": 0.5342679127725857, "grad_norm": 1.1829743287457746, "learning_rate": 4.692089683058643e-06, "loss": 0.4782, "step": 2401 }, { "epoch": 0.5344904316866934, "grad_norm": 1.151106971555874, "learning_rate": 4.688493032454601e-06, "loss": 0.4794, "step": 2402 }, { "epoch": 0.534712950600801, "grad_norm": 1.1041759872677834, "learning_rate": 4.684896543656237e-06, "loss": 0.4791, "step": 2403 }, { "epoch": 0.5349354695149088, "grad_norm": 1.1145624157994931, "learning_rate": 4.681300218531668e-06, "loss": 0.466, "step": 2404 }, { "epoch": 0.5351579884290165, "grad_norm": 1.1857591715438007, "learning_rate": 4.677704058948932e-06, "loss": 0.4926, "step": 2405 }, { "epoch": 0.5353805073431241, "grad_norm": 1.060685430594946, "learning_rate": 4.6741080667759755e-06, "loss": 0.4675, "step": 2406 }, { "epoch": 0.5356030262572319, "grad_norm": 1.1273088580844244, "learning_rate": 4.67051224388066e-06, "loss": 0.4789, "step": 2407 }, { "epoch": 0.5358255451713395, "grad_norm": 1.1777838658018582, "learning_rate": 4.6669165921307575e-06, "loss": 0.4878, "step": 2408 }, { "epoch": 0.5360480640854473, "grad_norm": 1.1470051952055025, "learning_rate": 4.663321113393957e-06, "loss": 0.477, "step": 2409 }, { "epoch": 0.536270582999555, "grad_norm": 1.1239852905826047, "learning_rate": 4.65972580953785e-06, "loss": 0.4801, "step": 2410 }, { "epoch": 0.5364931019136626, "grad_norm": 1.1151149019568334, "learning_rate": 4.656130682429945e-06, "loss": 0.4651, "step": 2411 }, { "epoch": 0.5367156208277704, "grad_norm": 1.066776814329337, "learning_rate": 4.652535733937651e-06, "loss": 0.4791, "step": 2412 }, { "epoch": 0.5369381397418781, "grad_norm": 1.0880143363002595, "learning_rate": 4.648940965928288e-06, "loss": 0.4766, "step": 2413 }, { "epoch": 0.5371606586559857, "grad_norm": 2.061312379133434, "learning_rate": 4.6453463802690815e-06, "loss": 0.4809, "step": 2414 }, { "epoch": 0.5373831775700935, "grad_norm": 1.1122838798399328, "learning_rate": 4.641751978827162e-06, "loss": 0.4812, "step": 2415 }, { "epoch": 0.5376056964842012, "grad_norm": 1.1066803985920637, "learning_rate": 4.638157763469568e-06, "loss": 0.4666, "step": 2416 }, { "epoch": 0.5378282153983088, "grad_norm": 1.0817352499277921, "learning_rate": 4.634563736063238e-06, "loss": 0.4749, "step": 2417 }, { "epoch": 0.5380507343124166, "grad_norm": 1.2469057387569722, "learning_rate": 4.630969898475011e-06, "loss": 0.4627, "step": 2418 }, { "epoch": 0.5382732532265243, "grad_norm": 1.1736894953716481, "learning_rate": 4.62737625257163e-06, "loss": 0.4649, "step": 2419 }, { "epoch": 0.5384957721406319, "grad_norm": 1.1731121478417943, "learning_rate": 4.623782800219739e-06, "loss": 0.494, "step": 2420 }, { "epoch": 0.5387182910547397, "grad_norm": 1.1763159036505988, "learning_rate": 4.620189543285877e-06, "loss": 0.4866, "step": 2421 }, { "epoch": 0.5389408099688473, "grad_norm": 1.1739808569741839, "learning_rate": 4.616596483636491e-06, "loss": 0.4617, "step": 2422 }, { "epoch": 0.539163328882955, "grad_norm": 1.1359734941639328, "learning_rate": 4.613003623137916e-06, "loss": 0.4754, "step": 2423 }, { "epoch": 0.5393858477970628, "grad_norm": 1.084795290802515, "learning_rate": 4.609410963656387e-06, "loss": 0.4458, "step": 2424 }, { "epoch": 0.5396083667111704, "grad_norm": 1.0574245440968166, "learning_rate": 4.6058185070580365e-06, "loss": 0.4739, "step": 2425 }, { "epoch": 0.5398308856252781, "grad_norm": 1.1640117955096787, "learning_rate": 4.6022262552088874e-06, "loss": 0.4844, "step": 2426 }, { "epoch": 0.5400534045393859, "grad_norm": 1.127141069244046, "learning_rate": 4.598634209974858e-06, "loss": 0.471, "step": 2427 }, { "epoch": 0.5402759234534935, "grad_norm": 1.0596166707753738, "learning_rate": 4.595042373221763e-06, "loss": 0.473, "step": 2428 }, { "epoch": 0.5404984423676013, "grad_norm": 1.065141174881477, "learning_rate": 4.591450746815305e-06, "loss": 0.4793, "step": 2429 }, { "epoch": 0.540720961281709, "grad_norm": 1.1432130809736722, "learning_rate": 4.587859332621076e-06, "loss": 0.4791, "step": 2430 }, { "epoch": 0.5409434801958166, "grad_norm": 1.1360738748807935, "learning_rate": 4.584268132504561e-06, "loss": 0.4738, "step": 2431 }, { "epoch": 0.5411659991099244, "grad_norm": 1.1171858562245152, "learning_rate": 4.580677148331134e-06, "loss": 0.4783, "step": 2432 }, { "epoch": 0.5413885180240321, "grad_norm": 1.1171838799423695, "learning_rate": 4.577086381966051e-06, "loss": 0.4792, "step": 2433 }, { "epoch": 0.5416110369381397, "grad_norm": 1.1112192827095173, "learning_rate": 4.5734958352744655e-06, "loss": 0.4684, "step": 2434 }, { "epoch": 0.5418335558522475, "grad_norm": 1.0766539524401415, "learning_rate": 4.569905510121405e-06, "loss": 0.4737, "step": 2435 }, { "epoch": 0.5420560747663551, "grad_norm": 1.1304006414799657, "learning_rate": 4.566315408371792e-06, "loss": 0.4706, "step": 2436 }, { "epoch": 0.5422785936804628, "grad_norm": 1.1152335287052457, "learning_rate": 4.562725531890427e-06, "loss": 0.476, "step": 2437 }, { "epoch": 0.5425011125945706, "grad_norm": 1.0973235973465967, "learning_rate": 4.559135882541995e-06, "loss": 0.4796, "step": 2438 }, { "epoch": 0.5427236315086782, "grad_norm": 1.1962399817390854, "learning_rate": 4.55554646219106e-06, "loss": 0.4749, "step": 2439 }, { "epoch": 0.5429461504227859, "grad_norm": 1.1131123093296502, "learning_rate": 4.551957272702076e-06, "loss": 0.4693, "step": 2440 }, { "epoch": 0.5431686693368937, "grad_norm": 1.039741877091728, "learning_rate": 4.548368315939366e-06, "loss": 0.4819, "step": 2441 }, { "epoch": 0.5433911882510013, "grad_norm": 1.0379654428167395, "learning_rate": 4.5447795937671426e-06, "loss": 0.4655, "step": 2442 }, { "epoch": 0.543613707165109, "grad_norm": 1.0838912203917335, "learning_rate": 4.541191108049487e-06, "loss": 0.4925, "step": 2443 }, { "epoch": 0.5438362260792168, "grad_norm": 1.1247668968546305, "learning_rate": 4.537602860650364e-06, "loss": 0.4906, "step": 2444 }, { "epoch": 0.5440587449933244, "grad_norm": 1.1249861672813495, "learning_rate": 4.534014853433609e-06, "loss": 0.4889, "step": 2445 }, { "epoch": 0.5442812639074321, "grad_norm": 1.076164464782694, "learning_rate": 4.53042708826294e-06, "loss": 0.4596, "step": 2446 }, { "epoch": 0.5445037828215399, "grad_norm": 1.1075746138617564, "learning_rate": 4.526839567001945e-06, "loss": 0.4779, "step": 2447 }, { "epoch": 0.5447263017356475, "grad_norm": 1.0681475526901059, "learning_rate": 4.523252291514086e-06, "loss": 0.4733, "step": 2448 }, { "epoch": 0.5449488206497553, "grad_norm": 1.1283335065341364, "learning_rate": 4.519665263662696e-06, "loss": 0.4808, "step": 2449 }, { "epoch": 0.5451713395638629, "grad_norm": 1.2274996388466335, "learning_rate": 4.516078485310978e-06, "loss": 0.484, "step": 2450 }, { "epoch": 0.5453938584779706, "grad_norm": 1.056766978867283, "learning_rate": 4.512491958322012e-06, "loss": 0.4682, "step": 2451 }, { "epoch": 0.5456163773920784, "grad_norm": 1.0752082230879212, "learning_rate": 4.5089056845587385e-06, "loss": 0.4939, "step": 2452 }, { "epoch": 0.545838896306186, "grad_norm": 1.1538903809246948, "learning_rate": 4.505319665883977e-06, "loss": 0.4763, "step": 2453 }, { "epoch": 0.5460614152202937, "grad_norm": 1.1223372631590722, "learning_rate": 4.501733904160406e-06, "loss": 0.4642, "step": 2454 }, { "epoch": 0.5462839341344015, "grad_norm": 1.1487236415217426, "learning_rate": 4.4981484012505726e-06, "loss": 0.4853, "step": 2455 }, { "epoch": 0.5465064530485091, "grad_norm": 1.1404046713150457, "learning_rate": 4.494563159016891e-06, "loss": 0.4689, "step": 2456 }, { "epoch": 0.5467289719626168, "grad_norm": 1.1322723276367337, "learning_rate": 4.490978179321638e-06, "loss": 0.49, "step": 2457 }, { "epoch": 0.5469514908767246, "grad_norm": 1.1224378638615502, "learning_rate": 4.487393464026955e-06, "loss": 0.4961, "step": 2458 }, { "epoch": 0.5471740097908322, "grad_norm": 1.1341787306021394, "learning_rate": 4.483809014994849e-06, "loss": 0.4659, "step": 2459 }, { "epoch": 0.5473965287049399, "grad_norm": 1.1242835148540582, "learning_rate": 4.480224834087183e-06, "loss": 0.4644, "step": 2460 }, { "epoch": 0.5476190476190477, "grad_norm": 1.0464878533522821, "learning_rate": 4.476640923165685e-06, "loss": 0.4867, "step": 2461 }, { "epoch": 0.5478415665331553, "grad_norm": 1.0365748321686254, "learning_rate": 4.473057284091942e-06, "loss": 0.479, "step": 2462 }, { "epoch": 0.548064085447263, "grad_norm": 1.174289027536862, "learning_rate": 4.469473918727397e-06, "loss": 0.4791, "step": 2463 }, { "epoch": 0.5482866043613707, "grad_norm": 1.1160025014468489, "learning_rate": 4.465890828933353e-06, "loss": 0.4847, "step": 2464 }, { "epoch": 0.5485091232754784, "grad_norm": 1.045642104166538, "learning_rate": 4.462308016570973e-06, "loss": 0.4554, "step": 2465 }, { "epoch": 0.5487316421895861, "grad_norm": 1.096830122141865, "learning_rate": 4.45872548350127e-06, "loss": 0.4769, "step": 2466 }, { "epoch": 0.5489541611036938, "grad_norm": 1.1858376639817696, "learning_rate": 4.4551432315851165e-06, "loss": 0.4699, "step": 2467 }, { "epoch": 0.5491766800178015, "grad_norm": 1.10059681188174, "learning_rate": 4.451561262683236e-06, "loss": 0.4538, "step": 2468 }, { "epoch": 0.5493991989319092, "grad_norm": 1.1487471234222333, "learning_rate": 4.447979578656207e-06, "loss": 0.4488, "step": 2469 }, { "epoch": 0.5496217178460169, "grad_norm": 1.1138401463582284, "learning_rate": 4.444398181364456e-06, "loss": 0.4583, "step": 2470 }, { "epoch": 0.5498442367601246, "grad_norm": 1.1872464683185615, "learning_rate": 4.440817072668268e-06, "loss": 0.4866, "step": 2471 }, { "epoch": 0.5500667556742324, "grad_norm": 1.1414339608087027, "learning_rate": 4.437236254427772e-06, "loss": 0.4557, "step": 2472 }, { "epoch": 0.55028927458834, "grad_norm": 1.0781868481486994, "learning_rate": 4.4336557285029494e-06, "loss": 0.4717, "step": 2473 }, { "epoch": 0.5505117935024477, "grad_norm": 1.0985004770111635, "learning_rate": 4.430075496753626e-06, "loss": 0.4701, "step": 2474 }, { "epoch": 0.5507343124165555, "grad_norm": 1.209409635983976, "learning_rate": 4.426495561039477e-06, "loss": 0.468, "step": 2475 }, { "epoch": 0.5509568313306631, "grad_norm": 1.1421944767027923, "learning_rate": 4.422915923220025e-06, "loss": 0.4825, "step": 2476 }, { "epoch": 0.5511793502447708, "grad_norm": 1.0879959365641405, "learning_rate": 4.419336585154638e-06, "loss": 0.4736, "step": 2477 }, { "epoch": 0.5514018691588785, "grad_norm": 1.0869284514824378, "learning_rate": 4.4157575487025265e-06, "loss": 0.4691, "step": 2478 }, { "epoch": 0.5516243880729862, "grad_norm": 1.0985964892655833, "learning_rate": 4.412178815722744e-06, "loss": 0.4676, "step": 2479 }, { "epoch": 0.5518469069870939, "grad_norm": 1.092401873674104, "learning_rate": 4.408600388074188e-06, "loss": 0.4837, "step": 2480 }, { "epoch": 0.5520694259012016, "grad_norm": 1.0803215627291358, "learning_rate": 4.405022267615595e-06, "loss": 0.4522, "step": 2481 }, { "epoch": 0.5522919448153093, "grad_norm": 1.1308973314071158, "learning_rate": 4.401444456205546e-06, "loss": 0.4565, "step": 2482 }, { "epoch": 0.552514463729417, "grad_norm": 1.107670766756063, "learning_rate": 4.397866955702458e-06, "loss": 0.4893, "step": 2483 }, { "epoch": 0.5527369826435247, "grad_norm": 1.1448767473670358, "learning_rate": 4.3942897679645895e-06, "loss": 0.4855, "step": 2484 }, { "epoch": 0.5529595015576324, "grad_norm": 1.1439340916895702, "learning_rate": 4.390712894850033e-06, "loss": 0.477, "step": 2485 }, { "epoch": 0.5531820204717401, "grad_norm": 1.1644255716450234, "learning_rate": 4.387136338216718e-06, "loss": 0.4705, "step": 2486 }, { "epoch": 0.5534045393858478, "grad_norm": 1.1337872197284815, "learning_rate": 4.383560099922413e-06, "loss": 0.4652, "step": 2487 }, { "epoch": 0.5536270582999555, "grad_norm": 1.1076036903047013, "learning_rate": 4.379984181824718e-06, "loss": 0.4838, "step": 2488 }, { "epoch": 0.5538495772140632, "grad_norm": 1.1364795138295511, "learning_rate": 4.376408585781067e-06, "loss": 0.4966, "step": 2489 }, { "epoch": 0.5540720961281709, "grad_norm": 1.1491420970147195, "learning_rate": 4.372833313648729e-06, "loss": 0.491, "step": 2490 }, { "epoch": 0.5542946150422786, "grad_norm": 1.1220597275240793, "learning_rate": 4.369258367284803e-06, "loss": 0.4919, "step": 2491 }, { "epoch": 0.5545171339563862, "grad_norm": 1.118638328964577, "learning_rate": 4.365683748546218e-06, "loss": 0.475, "step": 2492 }, { "epoch": 0.554739652870494, "grad_norm": 1.110391431164002, "learning_rate": 4.362109459289734e-06, "loss": 0.4568, "step": 2493 }, { "epoch": 0.5549621717846017, "grad_norm": 1.1546745577443165, "learning_rate": 4.358535501371941e-06, "loss": 0.4645, "step": 2494 }, { "epoch": 0.5551846906987093, "grad_norm": 1.0804492294900698, "learning_rate": 4.354961876649252e-06, "loss": 0.4832, "step": 2495 }, { "epoch": 0.5554072096128171, "grad_norm": 1.112364901295686, "learning_rate": 4.351388586977914e-06, "loss": 0.465, "step": 2496 }, { "epoch": 0.5556297285269248, "grad_norm": 1.1171400650530772, "learning_rate": 4.347815634213995e-06, "loss": 0.4705, "step": 2497 }, { "epoch": 0.5558522474410325, "grad_norm": 1.0805091866516638, "learning_rate": 4.34424302021339e-06, "loss": 0.4705, "step": 2498 }, { "epoch": 0.5560747663551402, "grad_norm": 1.4290657609114583, "learning_rate": 4.340670746831818e-06, "loss": 0.472, "step": 2499 }, { "epoch": 0.5562972852692479, "grad_norm": 1.273694435512728, "learning_rate": 4.3370988159248185e-06, "loss": 0.4642, "step": 2500 }, { "epoch": 0.5565198041833556, "grad_norm": 1.2098103348894886, "learning_rate": 4.333527229347754e-06, "loss": 0.4828, "step": 2501 }, { "epoch": 0.5567423230974633, "grad_norm": 1.1711669049388875, "learning_rate": 4.329955988955814e-06, "loss": 0.4717, "step": 2502 }, { "epoch": 0.556964842011571, "grad_norm": 1.1295470223627069, "learning_rate": 4.326385096604e-06, "loss": 0.4806, "step": 2503 }, { "epoch": 0.5571873609256787, "grad_norm": 1.1198873124641795, "learning_rate": 4.322814554147138e-06, "loss": 0.4849, "step": 2504 }, { "epoch": 0.5574098798397864, "grad_norm": 1.1045423105066325, "learning_rate": 4.3192443634398695e-06, "loss": 0.4693, "step": 2505 }, { "epoch": 0.557632398753894, "grad_norm": 1.1717348870861475, "learning_rate": 4.3156745263366526e-06, "loss": 0.4858, "step": 2506 }, { "epoch": 0.5578549176680018, "grad_norm": 1.087770351364647, "learning_rate": 4.312105044691766e-06, "loss": 0.4792, "step": 2507 }, { "epoch": 0.5580774365821095, "grad_norm": 1.0409039358176668, "learning_rate": 4.308535920359299e-06, "loss": 0.4838, "step": 2508 }, { "epoch": 0.5582999554962171, "grad_norm": 1.132698845142595, "learning_rate": 4.304967155193159e-06, "loss": 0.4901, "step": 2509 }, { "epoch": 0.5585224744103249, "grad_norm": 1.1181375547074373, "learning_rate": 4.3013987510470665e-06, "loss": 0.4774, "step": 2510 }, { "epoch": 0.5587449933244326, "grad_norm": 1.0926036175049192, "learning_rate": 4.297830709774551e-06, "loss": 0.4736, "step": 2511 }, { "epoch": 0.5589675122385402, "grad_norm": 1.1710842908172476, "learning_rate": 4.294263033228954e-06, "loss": 0.4732, "step": 2512 }, { "epoch": 0.559190031152648, "grad_norm": 1.1034888698789478, "learning_rate": 4.290695723263432e-06, "loss": 0.4739, "step": 2513 }, { "epoch": 0.5594125500667557, "grad_norm": 1.2729607829771954, "learning_rate": 4.287128781730947e-06, "loss": 0.4734, "step": 2514 }, { "epoch": 0.5596350689808633, "grad_norm": 1.253195420066453, "learning_rate": 4.283562210484275e-06, "loss": 0.4659, "step": 2515 }, { "epoch": 0.5598575878949711, "grad_norm": 1.2632119275023146, "learning_rate": 4.279996011375991e-06, "loss": 0.4833, "step": 2516 }, { "epoch": 0.5600801068090788, "grad_norm": 1.1848330162791554, "learning_rate": 4.276430186258482e-06, "loss": 0.4741, "step": 2517 }, { "epoch": 0.5603026257231865, "grad_norm": 1.200406173714087, "learning_rate": 4.2728647369839425e-06, "loss": 0.4584, "step": 2518 }, { "epoch": 0.5605251446372942, "grad_norm": 1.1789392194222845, "learning_rate": 4.269299665404365e-06, "loss": 0.4689, "step": 2519 }, { "epoch": 0.5607476635514018, "grad_norm": 1.2082137521342902, "learning_rate": 4.2657349733715555e-06, "loss": 0.4882, "step": 2520 }, { "epoch": 0.5609701824655096, "grad_norm": 1.186790915176405, "learning_rate": 4.262170662737115e-06, "loss": 0.4731, "step": 2521 }, { "epoch": 0.5611927013796173, "grad_norm": 1.2325611660907756, "learning_rate": 4.258606735352449e-06, "loss": 0.4586, "step": 2522 }, { "epoch": 0.5614152202937249, "grad_norm": 1.1160582069875375, "learning_rate": 4.255043193068763e-06, "loss": 0.4509, "step": 2523 }, { "epoch": 0.5616377392078327, "grad_norm": 1.138442498654888, "learning_rate": 4.251480037737065e-06, "loss": 0.4676, "step": 2524 }, { "epoch": 0.5618602581219404, "grad_norm": 1.1815324290500948, "learning_rate": 4.247917271208161e-06, "loss": 0.4724, "step": 2525 }, { "epoch": 0.562082777036048, "grad_norm": 1.1068537675934387, "learning_rate": 4.244354895332649e-06, "loss": 0.4686, "step": 2526 }, { "epoch": 0.5623052959501558, "grad_norm": 1.1427059560363264, "learning_rate": 4.240792911960936e-06, "loss": 0.4703, "step": 2527 }, { "epoch": 0.5625278148642635, "grad_norm": 1.135791659717845, "learning_rate": 4.237231322943216e-06, "loss": 0.4756, "step": 2528 }, { "epoch": 0.5627503337783711, "grad_norm": 1.168810402986495, "learning_rate": 4.2336701301294805e-06, "loss": 0.4816, "step": 2529 }, { "epoch": 0.5629728526924789, "grad_norm": 1.1768412063792142, "learning_rate": 4.2301093353695165e-06, "loss": 0.4732, "step": 2530 }, { "epoch": 0.5631953716065866, "grad_norm": 1.0673900298558363, "learning_rate": 4.2265489405129015e-06, "loss": 0.4757, "step": 2531 }, { "epoch": 0.5634178905206942, "grad_norm": 1.1702337629069137, "learning_rate": 4.222988947409007e-06, "loss": 0.4642, "step": 2532 }, { "epoch": 0.563640409434802, "grad_norm": 1.198260821107904, "learning_rate": 4.219429357906998e-06, "loss": 0.4612, "step": 2533 }, { "epoch": 0.5638629283489096, "grad_norm": 1.2489636553776797, "learning_rate": 4.215870173855825e-06, "loss": 0.4768, "step": 2534 }, { "epoch": 0.5640854472630173, "grad_norm": 1.3159455588451145, "learning_rate": 4.212311397104233e-06, "loss": 0.4769, "step": 2535 }, { "epoch": 0.5643079661771251, "grad_norm": 1.2259708738399198, "learning_rate": 4.208753029500753e-06, "loss": 0.4767, "step": 2536 }, { "epoch": 0.5645304850912327, "grad_norm": 1.139410395140211, "learning_rate": 4.2051950728937e-06, "loss": 0.4697, "step": 2537 }, { "epoch": 0.5647530040053405, "grad_norm": 1.2487473917152012, "learning_rate": 4.2016375291311815e-06, "loss": 0.4763, "step": 2538 }, { "epoch": 0.5649755229194482, "grad_norm": 1.2496237513152364, "learning_rate": 4.198080400061088e-06, "loss": 0.4777, "step": 2539 }, { "epoch": 0.5651980418335558, "grad_norm": 1.1332540636826804, "learning_rate": 4.194523687531096e-06, "loss": 0.4865, "step": 2540 }, { "epoch": 0.5654205607476636, "grad_norm": 1.2259966648824616, "learning_rate": 4.190967393388662e-06, "loss": 0.4754, "step": 2541 }, { "epoch": 0.5656430796617713, "grad_norm": 1.1772076146051436, "learning_rate": 4.187411519481028e-06, "loss": 0.474, "step": 2542 }, { "epoch": 0.5658655985758789, "grad_norm": 1.1925956172883736, "learning_rate": 4.183856067655216e-06, "loss": 0.4811, "step": 2543 }, { "epoch": 0.5660881174899867, "grad_norm": 1.122113926787242, "learning_rate": 4.1803010397580315e-06, "loss": 0.4814, "step": 2544 }, { "epoch": 0.5663106364040944, "grad_norm": 1.0770592953852505, "learning_rate": 4.176746437636059e-06, "loss": 0.475, "step": 2545 }, { "epoch": 0.566533155318202, "grad_norm": 1.128276391049851, "learning_rate": 4.17319226313566e-06, "loss": 0.4643, "step": 2546 }, { "epoch": 0.5667556742323098, "grad_norm": 1.1427038158686733, "learning_rate": 4.169638518102975e-06, "loss": 0.4694, "step": 2547 }, { "epoch": 0.5669781931464174, "grad_norm": 1.0913377236802295, "learning_rate": 4.16608520438392e-06, "loss": 0.4578, "step": 2548 }, { "epoch": 0.5672007120605251, "grad_norm": 1.0551321124161814, "learning_rate": 4.162532323824191e-06, "loss": 0.475, "step": 2549 }, { "epoch": 0.5674232309746329, "grad_norm": 1.1224159650251815, "learning_rate": 4.158979878269253e-06, "loss": 0.4913, "step": 2550 }, { "epoch": 0.5676457498887405, "grad_norm": 1.0867432358956874, "learning_rate": 4.155427869564352e-06, "loss": 0.4834, "step": 2551 }, { "epoch": 0.5678682688028482, "grad_norm": 1.0998617718789945, "learning_rate": 4.151876299554501e-06, "loss": 0.4644, "step": 2552 }, { "epoch": 0.568090787716956, "grad_norm": 1.1457175973289353, "learning_rate": 4.148325170084489e-06, "loss": 0.4929, "step": 2553 }, { "epoch": 0.5683133066310636, "grad_norm": 1.1117338955376161, "learning_rate": 4.144774482998874e-06, "loss": 0.4703, "step": 2554 }, { "epoch": 0.5685358255451713, "grad_norm": 1.3328289729358698, "learning_rate": 4.141224240141986e-06, "loss": 0.4765, "step": 2555 }, { "epoch": 0.5687583444592791, "grad_norm": 1.1205595593000943, "learning_rate": 4.137674443357919e-06, "loss": 0.4831, "step": 2556 }, { "epoch": 0.5689808633733867, "grad_norm": 1.1830108448024192, "learning_rate": 4.134125094490545e-06, "loss": 0.4827, "step": 2557 }, { "epoch": 0.5692033822874945, "grad_norm": 1.1376945684843869, "learning_rate": 4.130576195383497e-06, "loss": 0.4802, "step": 2558 }, { "epoch": 0.5694259012016022, "grad_norm": 1.1740469932791484, "learning_rate": 4.127027747880173e-06, "loss": 0.4767, "step": 2559 }, { "epoch": 0.5696484201157098, "grad_norm": 1.1871880283174672, "learning_rate": 4.123479753823739e-06, "loss": 0.4609, "step": 2560 }, { "epoch": 0.5698709390298176, "grad_norm": 1.095382808236446, "learning_rate": 4.119932215057126e-06, "loss": 0.475, "step": 2561 }, { "epoch": 0.5700934579439252, "grad_norm": 1.1898295139432227, "learning_rate": 4.116385133423029e-06, "loss": 0.464, "step": 2562 }, { "epoch": 0.5703159768580329, "grad_norm": 1.2006399649745045, "learning_rate": 4.112838510763899e-06, "loss": 0.4732, "step": 2563 }, { "epoch": 0.5705384957721407, "grad_norm": 1.1599520226549196, "learning_rate": 4.10929234892196e-06, "loss": 0.4717, "step": 2564 }, { "epoch": 0.5707610146862483, "grad_norm": 1.0928918238497611, "learning_rate": 4.1057466497391875e-06, "loss": 0.4661, "step": 2565 }, { "epoch": 0.570983533600356, "grad_norm": 1.127572050318994, "learning_rate": 4.10220141505732e-06, "loss": 0.4649, "step": 2566 }, { "epoch": 0.5712060525144638, "grad_norm": 1.0889708084931766, "learning_rate": 4.098656646717854e-06, "loss": 0.4732, "step": 2567 }, { "epoch": 0.5714285714285714, "grad_norm": 1.1108899006606068, "learning_rate": 4.095112346562045e-06, "loss": 0.4604, "step": 2568 }, { "epoch": 0.5716510903426791, "grad_norm": 1.303635878976953, "learning_rate": 4.091568516430902e-06, "loss": 0.4657, "step": 2569 }, { "epoch": 0.5718736092567869, "grad_norm": 1.1407864958770655, "learning_rate": 4.088025158165196e-06, "loss": 0.4612, "step": 2570 }, { "epoch": 0.5720961281708945, "grad_norm": 1.1517889105130936, "learning_rate": 4.0844822736054464e-06, "loss": 0.4827, "step": 2571 }, { "epoch": 0.5723186470850022, "grad_norm": 1.1259926755718155, "learning_rate": 4.08093986459193e-06, "loss": 0.4534, "step": 2572 }, { "epoch": 0.5725411659991099, "grad_norm": 1.0721766935544648, "learning_rate": 4.077397932964679e-06, "loss": 0.4666, "step": 2573 }, { "epoch": 0.5727636849132176, "grad_norm": 1.079637951631587, "learning_rate": 4.073856480563468e-06, "loss": 0.4733, "step": 2574 }, { "epoch": 0.5729862038273253, "grad_norm": 1.0939085516507, "learning_rate": 4.0703155092278324e-06, "loss": 0.4745, "step": 2575 }, { "epoch": 0.573208722741433, "grad_norm": 1.1201621429083188, "learning_rate": 4.0667750207970565e-06, "loss": 0.4754, "step": 2576 }, { "epoch": 0.5734312416555407, "grad_norm": 1.0858323353636328, "learning_rate": 4.063235017110172e-06, "loss": 0.4649, "step": 2577 }, { "epoch": 0.5736537605696485, "grad_norm": 1.1703734552761, "learning_rate": 4.059695500005955e-06, "loss": 0.4732, "step": 2578 }, { "epoch": 0.5738762794837561, "grad_norm": 1.1535849724991927, "learning_rate": 4.056156471322934e-06, "loss": 0.462, "step": 2579 }, { "epoch": 0.5740987983978638, "grad_norm": 1.1708280793566441, "learning_rate": 4.052617932899384e-06, "loss": 0.4706, "step": 2580 }, { "epoch": 0.5743213173119716, "grad_norm": 1.1345856597072486, "learning_rate": 4.04907988657332e-06, "loss": 0.4559, "step": 2581 }, { "epoch": 0.5745438362260792, "grad_norm": 1.1184679877642616, "learning_rate": 4.0455423341825094e-06, "loss": 0.455, "step": 2582 }, { "epoch": 0.5747663551401869, "grad_norm": 1.6797949932913259, "learning_rate": 4.042005277564456e-06, "loss": 0.4675, "step": 2583 }, { "epoch": 0.5749888740542947, "grad_norm": 1.0913734996566402, "learning_rate": 4.03846871855641e-06, "loss": 0.4655, "step": 2584 }, { "epoch": 0.5752113929684023, "grad_norm": 1.1299512750729652, "learning_rate": 4.034932658995359e-06, "loss": 0.4628, "step": 2585 }, { "epoch": 0.57543391188251, "grad_norm": 1.152395314505583, "learning_rate": 4.031397100718038e-06, "loss": 0.4486, "step": 2586 }, { "epoch": 0.5756564307966177, "grad_norm": 1.1290611779313677, "learning_rate": 4.027862045560913e-06, "loss": 0.4631, "step": 2587 }, { "epoch": 0.5758789497107254, "grad_norm": 1.1340997893107962, "learning_rate": 4.024327495360198e-06, "loss": 0.4529, "step": 2588 }, { "epoch": 0.5761014686248331, "grad_norm": 1.1675012902233368, "learning_rate": 4.020793451951839e-06, "loss": 0.4807, "step": 2589 }, { "epoch": 0.5763239875389408, "grad_norm": 1.1837526976417534, "learning_rate": 4.017259917171516e-06, "loss": 0.4776, "step": 2590 }, { "epoch": 0.5765465064530485, "grad_norm": 1.2053993330880148, "learning_rate": 4.013726892854654e-06, "loss": 0.4648, "step": 2591 }, { "epoch": 0.5767690253671562, "grad_norm": 1.3757851103181986, "learning_rate": 4.010194380836403e-06, "loss": 0.4767, "step": 2592 }, { "epoch": 0.5769915442812639, "grad_norm": 1.1439864896014003, "learning_rate": 4.006662382951653e-06, "loss": 0.4684, "step": 2593 }, { "epoch": 0.5772140631953716, "grad_norm": 1.168732894807988, "learning_rate": 4.0031309010350265e-06, "loss": 0.4808, "step": 2594 }, { "epoch": 0.5774365821094793, "grad_norm": 1.1240777677166487, "learning_rate": 3.999599936920875e-06, "loss": 0.4702, "step": 2595 }, { "epoch": 0.577659101023587, "grad_norm": 1.1462089775109572, "learning_rate": 3.996069492443283e-06, "loss": 0.4749, "step": 2596 }, { "epoch": 0.5778816199376947, "grad_norm": 1.0980905421262173, "learning_rate": 3.992539569436067e-06, "loss": 0.4813, "step": 2597 }, { "epoch": 0.5781041388518025, "grad_norm": 1.1148037002417788, "learning_rate": 3.9890101697327676e-06, "loss": 0.4722, "step": 2598 }, { "epoch": 0.5783266577659101, "grad_norm": 1.1333090242258534, "learning_rate": 3.985481295166658e-06, "loss": 0.4668, "step": 2599 }, { "epoch": 0.5785491766800178, "grad_norm": 1.0961921797654712, "learning_rate": 3.981952947570735e-06, "loss": 0.4682, "step": 2600 }, { "epoch": 0.5787716955941254, "grad_norm": 1.185838093400848, "learning_rate": 3.9784251287777274e-06, "loss": 0.4525, "step": 2601 }, { "epoch": 0.5789942145082332, "grad_norm": 1.1877829718132746, "learning_rate": 3.974897840620084e-06, "loss": 0.4741, "step": 2602 }, { "epoch": 0.5792167334223409, "grad_norm": 1.1326005987300014, "learning_rate": 3.97137108492998e-06, "loss": 0.469, "step": 2603 }, { "epoch": 0.5794392523364486, "grad_norm": 1.2399303693305317, "learning_rate": 3.967844863539313e-06, "loss": 0.4719, "step": 2604 }, { "epoch": 0.5796617712505563, "grad_norm": 1.1835871304392822, "learning_rate": 3.964319178279703e-06, "loss": 0.4585, "step": 2605 }, { "epoch": 0.579884290164664, "grad_norm": 1.1421471054389785, "learning_rate": 3.960794030982493e-06, "loss": 0.4763, "step": 2606 }, { "epoch": 0.5801068090787717, "grad_norm": 1.2315718929042754, "learning_rate": 3.957269423478748e-06, "loss": 0.4649, "step": 2607 }, { "epoch": 0.5803293279928794, "grad_norm": 1.178304328228628, "learning_rate": 3.95374535759925e-06, "loss": 0.4686, "step": 2608 }, { "epoch": 0.5805518469069871, "grad_norm": 1.18530861815621, "learning_rate": 3.950221835174498e-06, "loss": 0.4632, "step": 2609 }, { "epoch": 0.5807743658210948, "grad_norm": 1.0756495623807656, "learning_rate": 3.94669885803471e-06, "loss": 0.4648, "step": 2610 }, { "epoch": 0.5809968847352025, "grad_norm": 1.0687087156095394, "learning_rate": 3.943176428009826e-06, "loss": 0.4473, "step": 2611 }, { "epoch": 0.5812194036493102, "grad_norm": 1.0902881528062176, "learning_rate": 3.939654546929492e-06, "loss": 0.4468, "step": 2612 }, { "epoch": 0.5814419225634179, "grad_norm": 1.2135406077553215, "learning_rate": 3.93613321662308e-06, "loss": 0.4593, "step": 2613 }, { "epoch": 0.5816644414775256, "grad_norm": 1.1564281059265242, "learning_rate": 3.932612438919667e-06, "loss": 0.4772, "step": 2614 }, { "epoch": 0.5818869603916332, "grad_norm": 1.2022924688229115, "learning_rate": 3.929092215648046e-06, "loss": 0.4842, "step": 2615 }, { "epoch": 0.582109479305741, "grad_norm": 1.167088302243453, "learning_rate": 3.925572548636722e-06, "loss": 0.4731, "step": 2616 }, { "epoch": 0.5823319982198487, "grad_norm": 1.1790443804391575, "learning_rate": 3.922053439713914e-06, "loss": 0.4805, "step": 2617 }, { "epoch": 0.5825545171339563, "grad_norm": 1.2308820297720242, "learning_rate": 3.9185348907075435e-06, "loss": 0.4664, "step": 2618 }, { "epoch": 0.5827770360480641, "grad_norm": 1.1626261364027402, "learning_rate": 3.915016903445252e-06, "loss": 0.4483, "step": 2619 }, { "epoch": 0.5829995549621718, "grad_norm": 1.199581098051896, "learning_rate": 3.9114994797543795e-06, "loss": 0.4904, "step": 2620 }, { "epoch": 0.5832220738762794, "grad_norm": 1.2014376724538842, "learning_rate": 3.907982621461978e-06, "loss": 0.4813, "step": 2621 }, { "epoch": 0.5834445927903872, "grad_norm": 1.2101764232971441, "learning_rate": 3.904466330394804e-06, "loss": 0.4674, "step": 2622 }, { "epoch": 0.5836671117044949, "grad_norm": 1.2386137413832494, "learning_rate": 3.900950608379322e-06, "loss": 0.4798, "step": 2623 }, { "epoch": 0.5838896306186026, "grad_norm": 1.375339279050549, "learning_rate": 3.8974354572416936e-06, "loss": 0.4665, "step": 2624 }, { "epoch": 0.5841121495327103, "grad_norm": 1.2068328647896092, "learning_rate": 3.893920878807797e-06, "loss": 0.4896, "step": 2625 }, { "epoch": 0.584334668446818, "grad_norm": 1.1790528094082493, "learning_rate": 3.890406874903203e-06, "loss": 0.4527, "step": 2626 }, { "epoch": 0.5845571873609257, "grad_norm": 1.1902344794881135, "learning_rate": 3.8868934473531836e-06, "loss": 0.4616, "step": 2627 }, { "epoch": 0.5847797062750334, "grad_norm": 1.1717646276910598, "learning_rate": 3.883380597982716e-06, "loss": 0.4597, "step": 2628 }, { "epoch": 0.585002225189141, "grad_norm": 1.2047319768502913, "learning_rate": 3.879868328616476e-06, "loss": 0.442, "step": 2629 }, { "epoch": 0.5852247441032488, "grad_norm": 1.205513963840043, "learning_rate": 3.876356641078833e-06, "loss": 0.4797, "step": 2630 }, { "epoch": 0.5854472630173565, "grad_norm": 1.1941733076678902, "learning_rate": 3.8728455371938654e-06, "loss": 0.4651, "step": 2631 }, { "epoch": 0.5856697819314641, "grad_norm": 1.1173591839410542, "learning_rate": 3.869335018785335e-06, "loss": 0.4736, "step": 2632 }, { "epoch": 0.5858923008455719, "grad_norm": 1.1902556549753383, "learning_rate": 3.865825087676711e-06, "loss": 0.46, "step": 2633 }, { "epoch": 0.5861148197596796, "grad_norm": 1.1487823602717615, "learning_rate": 3.862315745691149e-06, "loss": 0.4551, "step": 2634 }, { "epoch": 0.5863373386737872, "grad_norm": 1.1087445272262315, "learning_rate": 3.8588069946515055e-06, "loss": 0.4878, "step": 2635 }, { "epoch": 0.586559857587895, "grad_norm": 1.167140616861334, "learning_rate": 3.855298836380323e-06, "loss": 0.4595, "step": 2636 }, { "epoch": 0.5867823765020027, "grad_norm": 1.2157353616293067, "learning_rate": 3.85179127269984e-06, "loss": 0.4666, "step": 2637 }, { "epoch": 0.5870048954161103, "grad_norm": 1.1216150182441915, "learning_rate": 3.8482843054319895e-06, "loss": 0.4698, "step": 2638 }, { "epoch": 0.5872274143302181, "grad_norm": 1.1345133931452467, "learning_rate": 3.84477793639839e-06, "loss": 0.4528, "step": 2639 }, { "epoch": 0.5874499332443258, "grad_norm": 1.2583831004984638, "learning_rate": 3.841272167420348e-06, "loss": 0.4799, "step": 2640 }, { "epoch": 0.5876724521584334, "grad_norm": 1.247376125050359, "learning_rate": 3.837767000318862e-06, "loss": 0.4765, "step": 2641 }, { "epoch": 0.5878949710725412, "grad_norm": 1.1627209359713666, "learning_rate": 3.834262436914618e-06, "loss": 0.47, "step": 2642 }, { "epoch": 0.5881174899866488, "grad_norm": 1.461447525369011, "learning_rate": 3.830758479027983e-06, "loss": 0.4691, "step": 2643 }, { "epoch": 0.5883400089007566, "grad_norm": 1.3336620107942079, "learning_rate": 3.82725512847902e-06, "loss": 0.4633, "step": 2644 }, { "epoch": 0.5885625278148643, "grad_norm": 1.1569503205481502, "learning_rate": 3.8237523870874645e-06, "loss": 0.4491, "step": 2645 }, { "epoch": 0.5887850467289719, "grad_norm": 1.2556648798862744, "learning_rate": 3.820250256672744e-06, "loss": 0.4776, "step": 2646 }, { "epoch": 0.5890075656430797, "grad_norm": 1.3334198357601377, "learning_rate": 3.816748739053963e-06, "loss": 0.4698, "step": 2647 }, { "epoch": 0.5892300845571874, "grad_norm": 1.1439605729236826, "learning_rate": 3.8132478360499128e-06, "loss": 0.4598, "step": 2648 }, { "epoch": 0.589452603471295, "grad_norm": 1.1450020932102614, "learning_rate": 3.809747549479061e-06, "loss": 0.4664, "step": 2649 }, { "epoch": 0.5896751223854028, "grad_norm": 1.1675267749461036, "learning_rate": 3.8062478811595603e-06, "loss": 0.4637, "step": 2650 }, { "epoch": 0.5898976412995105, "grad_norm": 1.1240672439786676, "learning_rate": 3.802748832909237e-06, "loss": 0.4576, "step": 2651 }, { "epoch": 0.5901201602136181, "grad_norm": 1.1402785136618512, "learning_rate": 3.799250406545597e-06, "loss": 0.4567, "step": 2652 }, { "epoch": 0.5903426791277259, "grad_norm": 1.163183319275532, "learning_rate": 3.795752603885825e-06, "loss": 0.4783, "step": 2653 }, { "epoch": 0.5905651980418336, "grad_norm": 1.2522997459030705, "learning_rate": 3.7922554267467794e-06, "loss": 0.4445, "step": 2654 }, { "epoch": 0.5907877169559412, "grad_norm": 1.2038182719957222, "learning_rate": 3.7887588769449923e-06, "loss": 0.469, "step": 2655 }, { "epoch": 0.591010235870049, "grad_norm": 1.2346822076175517, "learning_rate": 3.785262956296677e-06, "loss": 0.4609, "step": 2656 }, { "epoch": 0.5912327547841566, "grad_norm": 1.2135584471544227, "learning_rate": 3.781767666617713e-06, "loss": 0.4571, "step": 2657 }, { "epoch": 0.5914552736982643, "grad_norm": 1.2059861898959416, "learning_rate": 3.7782730097236526e-06, "loss": 0.4636, "step": 2658 }, { "epoch": 0.5916777926123721, "grad_norm": 1.2441758277420945, "learning_rate": 3.7747789874297235e-06, "loss": 0.4607, "step": 2659 }, { "epoch": 0.5919003115264797, "grad_norm": 1.2405245531045077, "learning_rate": 3.7712856015508204e-06, "loss": 0.4691, "step": 2660 }, { "epoch": 0.5921228304405874, "grad_norm": 1.2499294684314903, "learning_rate": 3.7677928539015064e-06, "loss": 0.4665, "step": 2661 }, { "epoch": 0.5923453493546952, "grad_norm": 1.1819567128559625, "learning_rate": 3.7643007462960186e-06, "loss": 0.4583, "step": 2662 }, { "epoch": 0.5925678682688028, "grad_norm": 1.1962428888598282, "learning_rate": 3.7608092805482555e-06, "loss": 0.4645, "step": 2663 }, { "epoch": 0.5927903871829105, "grad_norm": 1.1070672355589957, "learning_rate": 3.757318458471787e-06, "loss": 0.4614, "step": 2664 }, { "epoch": 0.5930129060970183, "grad_norm": 1.2400601764448507, "learning_rate": 3.7538282818798457e-06, "loss": 0.4714, "step": 2665 }, { "epoch": 0.5932354250111259, "grad_norm": 1.1299129994899386, "learning_rate": 3.7503387525853284e-06, "loss": 0.4599, "step": 2666 }, { "epoch": 0.5934579439252337, "grad_norm": 1.1960402174193268, "learning_rate": 3.7468498724007964e-06, "loss": 0.4706, "step": 2667 }, { "epoch": 0.5936804628393414, "grad_norm": 1.2400381384983377, "learning_rate": 3.7433616431384767e-06, "loss": 0.4733, "step": 2668 }, { "epoch": 0.593902981753449, "grad_norm": 1.2609252193418188, "learning_rate": 3.739874066610256e-06, "loss": 0.4803, "step": 2669 }, { "epoch": 0.5941255006675568, "grad_norm": 1.1709435380710027, "learning_rate": 3.7363871446276814e-06, "loss": 0.4718, "step": 2670 }, { "epoch": 0.5943480195816644, "grad_norm": 1.1723973775353544, "learning_rate": 3.7329008790019594e-06, "loss": 0.4766, "step": 2671 }, { "epoch": 0.5945705384957721, "grad_norm": 1.2996862441369286, "learning_rate": 3.729415271543956e-06, "loss": 0.4534, "step": 2672 }, { "epoch": 0.5947930574098799, "grad_norm": 1.1288751772914372, "learning_rate": 3.7259303240641976e-06, "loss": 0.469, "step": 2673 }, { "epoch": 0.5950155763239875, "grad_norm": 1.23256269624103, "learning_rate": 3.722446038372864e-06, "loss": 0.4583, "step": 2674 }, { "epoch": 0.5952380952380952, "grad_norm": 1.3391015977935905, "learning_rate": 3.7189624162797965e-06, "loss": 0.455, "step": 2675 }, { "epoch": 0.595460614152203, "grad_norm": 1.1972947010556623, "learning_rate": 3.7154794595944875e-06, "loss": 0.4503, "step": 2676 }, { "epoch": 0.5956831330663106, "grad_norm": 1.2448752212799807, "learning_rate": 3.711997170126085e-06, "loss": 0.468, "step": 2677 }, { "epoch": 0.5959056519804183, "grad_norm": 1.2451418427158742, "learning_rate": 3.708515549683388e-06, "loss": 0.4677, "step": 2678 }, { "epoch": 0.5961281708945261, "grad_norm": 1.1812964046532803, "learning_rate": 3.7050346000748526e-06, "loss": 0.4536, "step": 2679 }, { "epoch": 0.5963506898086337, "grad_norm": 1.1560425780492694, "learning_rate": 3.7015543231085827e-06, "loss": 0.4736, "step": 2680 }, { "epoch": 0.5965732087227414, "grad_norm": 1.1404101079197233, "learning_rate": 3.698074720592336e-06, "loss": 0.47, "step": 2681 }, { "epoch": 0.5967957276368492, "grad_norm": 1.278221126484078, "learning_rate": 3.6945957943335175e-06, "loss": 0.48, "step": 2682 }, { "epoch": 0.5970182465509568, "grad_norm": 1.1460483446880447, "learning_rate": 3.6911175461391794e-06, "loss": 0.4677, "step": 2683 }, { "epoch": 0.5972407654650645, "grad_norm": 1.1945214870085064, "learning_rate": 3.687639977816028e-06, "loss": 0.4767, "step": 2684 }, { "epoch": 0.5974632843791722, "grad_norm": 1.167327621104579, "learning_rate": 3.6841630911704085e-06, "loss": 0.4706, "step": 2685 }, { "epoch": 0.5976858032932799, "grad_norm": 1.123191594821384, "learning_rate": 3.680686888008316e-06, "loss": 0.4628, "step": 2686 }, { "epoch": 0.5979083222073877, "grad_norm": 1.1117723775237511, "learning_rate": 3.677211370135393e-06, "loss": 0.4515, "step": 2687 }, { "epoch": 0.5981308411214953, "grad_norm": 1.1673004993634168, "learning_rate": 3.6737365393569212e-06, "loss": 0.4778, "step": 2688 }, { "epoch": 0.598353360035603, "grad_norm": 1.1467357239609046, "learning_rate": 3.6702623974778296e-06, "loss": 0.458, "step": 2689 }, { "epoch": 0.5985758789497108, "grad_norm": 1.2201699605321348, "learning_rate": 3.666788946302686e-06, "loss": 0.4634, "step": 2690 }, { "epoch": 0.5987983978638184, "grad_norm": 1.17480299176576, "learning_rate": 3.6633161876357017e-06, "loss": 0.4561, "step": 2691 }, { "epoch": 0.5990209167779261, "grad_norm": 1.0982580111277187, "learning_rate": 3.6598441232807234e-06, "loss": 0.454, "step": 2692 }, { "epoch": 0.5992434356920339, "grad_norm": 1.2227486068698437, "learning_rate": 3.6563727550412478e-06, "loss": 0.4631, "step": 2693 }, { "epoch": 0.5994659546061415, "grad_norm": 1.2593442132891612, "learning_rate": 3.6529020847203983e-06, "loss": 0.4756, "step": 2694 }, { "epoch": 0.5996884735202492, "grad_norm": 1.3900192594396652, "learning_rate": 3.6494321141209436e-06, "loss": 0.491, "step": 2695 }, { "epoch": 0.599910992434357, "grad_norm": 1.2875550891984096, "learning_rate": 3.6459628450452855e-06, "loss": 0.4728, "step": 2696 }, { "epoch": 0.6001335113484646, "grad_norm": 1.300908910754698, "learning_rate": 3.642494279295462e-06, "loss": 0.4585, "step": 2697 }, { "epoch": 0.6003560302625723, "grad_norm": 1.1620080017466834, "learning_rate": 3.639026418673144e-06, "loss": 0.459, "step": 2698 }, { "epoch": 0.60057854917668, "grad_norm": 1.2503557465573842, "learning_rate": 3.6355592649796406e-06, "loss": 0.458, "step": 2699 }, { "epoch": 0.6008010680907877, "grad_norm": 1.1836790145754996, "learning_rate": 3.6320928200158913e-06, "loss": 0.4614, "step": 2700 }, { "epoch": 0.6010235870048954, "grad_norm": 1.2160932543267293, "learning_rate": 3.628627085582466e-06, "loss": 0.4788, "step": 2701 }, { "epoch": 0.6012461059190031, "grad_norm": 1.248640062962131, "learning_rate": 3.625162063479568e-06, "loss": 0.4594, "step": 2702 }, { "epoch": 0.6014686248331108, "grad_norm": 1.2261582906515924, "learning_rate": 3.6216977555070267e-06, "loss": 0.4408, "step": 2703 }, { "epoch": 0.6016911437472185, "grad_norm": 1.1280498836808386, "learning_rate": 3.618234163464306e-06, "loss": 0.4668, "step": 2704 }, { "epoch": 0.6019136626613262, "grad_norm": 1.118292491649036, "learning_rate": 3.6147712891504936e-06, "loss": 0.4513, "step": 2705 }, { "epoch": 0.6021361815754339, "grad_norm": 1.1760226698158485, "learning_rate": 3.611309134364308e-06, "loss": 0.4565, "step": 2706 }, { "epoch": 0.6023587004895417, "grad_norm": 1.1287583793890978, "learning_rate": 3.60784770090409e-06, "loss": 0.4632, "step": 2707 }, { "epoch": 0.6025812194036493, "grad_norm": 1.1821384853454702, "learning_rate": 3.6043869905678096e-06, "loss": 0.4595, "step": 2708 }, { "epoch": 0.602803738317757, "grad_norm": 1.1712543326539369, "learning_rate": 3.6009270051530565e-06, "loss": 0.45, "step": 2709 }, { "epoch": 0.6030262572318648, "grad_norm": 1.1967392486632862, "learning_rate": 3.5974677464570496e-06, "loss": 0.4494, "step": 2710 }, { "epoch": 0.6032487761459724, "grad_norm": 1.1622690176942543, "learning_rate": 3.5940092162766234e-06, "loss": 0.4562, "step": 2711 }, { "epoch": 0.6034712950600801, "grad_norm": 1.1548797037439424, "learning_rate": 3.5905514164082423e-06, "loss": 0.4734, "step": 2712 }, { "epoch": 0.6036938139741878, "grad_norm": 1.204709374614804, "learning_rate": 3.5870943486479855e-06, "loss": 0.4571, "step": 2713 }, { "epoch": 0.6039163328882955, "grad_norm": 1.1437872589625602, "learning_rate": 3.5836380147915518e-06, "loss": 0.4572, "step": 2714 }, { "epoch": 0.6041388518024032, "grad_norm": 1.2199287161371528, "learning_rate": 3.5801824166342623e-06, "loss": 0.4498, "step": 2715 }, { "epoch": 0.6043613707165109, "grad_norm": 1.2062736639585256, "learning_rate": 3.576727555971054e-06, "loss": 0.455, "step": 2716 }, { "epoch": 0.6045838896306186, "grad_norm": 1.154979033372084, "learning_rate": 3.573273434596478e-06, "loss": 0.4523, "step": 2717 }, { "epoch": 0.6048064085447263, "grad_norm": 1.1164556346859495, "learning_rate": 3.5698200543047085e-06, "loss": 0.4699, "step": 2718 }, { "epoch": 0.605028927458834, "grad_norm": 1.1901298239990914, "learning_rate": 3.5663674168895276e-06, "loss": 0.4619, "step": 2719 }, { "epoch": 0.6052514463729417, "grad_norm": 1.155252602543938, "learning_rate": 3.562915524144336e-06, "loss": 0.4748, "step": 2720 }, { "epoch": 0.6054739652870494, "grad_norm": 1.1748261769059263, "learning_rate": 3.5594643778621455e-06, "loss": 0.4624, "step": 2721 }, { "epoch": 0.6056964842011571, "grad_norm": 1.1306743482311898, "learning_rate": 3.5560139798355814e-06, "loss": 0.4568, "step": 2722 }, { "epoch": 0.6059190031152648, "grad_norm": 1.1109787929613324, "learning_rate": 3.552564331856876e-06, "loss": 0.461, "step": 2723 }, { "epoch": 0.6061415220293725, "grad_norm": 1.236331439963534, "learning_rate": 3.5491154357178814e-06, "loss": 0.4685, "step": 2724 }, { "epoch": 0.6063640409434802, "grad_norm": 1.1559199088170007, "learning_rate": 3.545667293210049e-06, "loss": 0.4699, "step": 2725 }, { "epoch": 0.6065865598575879, "grad_norm": 1.1660396014134367, "learning_rate": 3.5422199061244454e-06, "loss": 0.4647, "step": 2726 }, { "epoch": 0.6068090787716955, "grad_norm": 1.132468973615632, "learning_rate": 3.538773276251743e-06, "loss": 0.4585, "step": 2727 }, { "epoch": 0.6070315976858033, "grad_norm": 1.2395145121546776, "learning_rate": 3.535327405382219e-06, "loss": 0.4744, "step": 2728 }, { "epoch": 0.607254116599911, "grad_norm": 1.2376198588660297, "learning_rate": 3.5318822953057563e-06, "loss": 0.4633, "step": 2729 }, { "epoch": 0.6074766355140186, "grad_norm": 1.1601435142092618, "learning_rate": 3.528437947811847e-06, "loss": 0.4351, "step": 2730 }, { "epoch": 0.6076991544281264, "grad_norm": 1.1889505839175414, "learning_rate": 3.524994364689584e-06, "loss": 0.4534, "step": 2731 }, { "epoch": 0.6079216733422341, "grad_norm": 1.1328754085476993, "learning_rate": 3.521551547727663e-06, "loss": 0.4514, "step": 2732 }, { "epoch": 0.6081441922563418, "grad_norm": 1.2016866548295666, "learning_rate": 3.5181094987143814e-06, "loss": 0.459, "step": 2733 }, { "epoch": 0.6083667111704495, "grad_norm": 1.2920436214802582, "learning_rate": 3.514668219437639e-06, "loss": 0.4713, "step": 2734 }, { "epoch": 0.6085892300845572, "grad_norm": 1.1819339957269521, "learning_rate": 3.5112277116849343e-06, "loss": 0.4634, "step": 2735 }, { "epoch": 0.6088117489986649, "grad_norm": 1.221117497773678, "learning_rate": 3.5077879772433666e-06, "loss": 0.446, "step": 2736 }, { "epoch": 0.6090342679127726, "grad_norm": 1.7035372573941712, "learning_rate": 3.5043490178996344e-06, "loss": 0.4705, "step": 2737 }, { "epoch": 0.6092567868268803, "grad_norm": 1.106065492004103, "learning_rate": 3.50091083544003e-06, "loss": 0.4712, "step": 2738 }, { "epoch": 0.609479305740988, "grad_norm": 1.176423247926734, "learning_rate": 3.497473431650446e-06, "loss": 0.452, "step": 2739 }, { "epoch": 0.6097018246550957, "grad_norm": 1.163808020218605, "learning_rate": 3.494036808316366e-06, "loss": 0.4552, "step": 2740 }, { "epoch": 0.6099243435692033, "grad_norm": 1.203801363166025, "learning_rate": 3.490600967222872e-06, "loss": 0.4598, "step": 2741 }, { "epoch": 0.6101468624833111, "grad_norm": 1.1939431987220062, "learning_rate": 3.487165910154641e-06, "loss": 0.4604, "step": 2742 }, { "epoch": 0.6103693813974188, "grad_norm": 1.1610405559806118, "learning_rate": 3.4837316388959398e-06, "loss": 0.4563, "step": 2743 }, { "epoch": 0.6105919003115264, "grad_norm": 1.1761711344027426, "learning_rate": 3.480298155230626e-06, "loss": 0.4507, "step": 2744 }, { "epoch": 0.6108144192256342, "grad_norm": 1.263639842054124, "learning_rate": 3.4768654609421486e-06, "loss": 0.4674, "step": 2745 }, { "epoch": 0.6110369381397419, "grad_norm": 1.2699753943942245, "learning_rate": 3.4734335578135516e-06, "loss": 0.4652, "step": 2746 }, { "epoch": 0.6112594570538495, "grad_norm": 1.1858552406243206, "learning_rate": 3.470002447627461e-06, "loss": 0.4585, "step": 2747 }, { "epoch": 0.6114819759679573, "grad_norm": 1.2912441821174965, "learning_rate": 3.466572132166094e-06, "loss": 0.432, "step": 2748 }, { "epoch": 0.611704494882065, "grad_norm": 1.210196788000266, "learning_rate": 3.463142613211258e-06, "loss": 0.4785, "step": 2749 }, { "epoch": 0.6119270137961726, "grad_norm": 1.2166004948005236, "learning_rate": 3.4597138925443407e-06, "loss": 0.4582, "step": 2750 }, { "epoch": 0.6121495327102804, "grad_norm": 1.3342015277342465, "learning_rate": 3.4562859719463216e-06, "loss": 0.4588, "step": 2751 }, { "epoch": 0.6123720516243881, "grad_norm": 1.164164577941016, "learning_rate": 3.452858853197758e-06, "loss": 0.4505, "step": 2752 }, { "epoch": 0.6125945705384958, "grad_norm": 1.2468436487704413, "learning_rate": 3.4494325380787953e-06, "loss": 0.4643, "step": 2753 }, { "epoch": 0.6128170894526035, "grad_norm": 1.311745790879984, "learning_rate": 3.4460070283691583e-06, "loss": 0.4637, "step": 2754 }, { "epoch": 0.6130396083667111, "grad_norm": 1.1778019893969123, "learning_rate": 3.4425823258481583e-06, "loss": 0.4699, "step": 2755 }, { "epoch": 0.6132621272808189, "grad_norm": 1.2028191244822906, "learning_rate": 3.4391584322946824e-06, "loss": 0.4703, "step": 2756 }, { "epoch": 0.6134846461949266, "grad_norm": 1.163418028614092, "learning_rate": 3.4357353494872004e-06, "loss": 0.4524, "step": 2757 }, { "epoch": 0.6137071651090342, "grad_norm": 1.2103264054971496, "learning_rate": 3.4323130792037596e-06, "loss": 0.4393, "step": 2758 }, { "epoch": 0.613929684023142, "grad_norm": 1.1983140111263408, "learning_rate": 3.428891623221985e-06, "loss": 0.4544, "step": 2759 }, { "epoch": 0.6141522029372497, "grad_norm": 1.178279570215205, "learning_rate": 3.4254709833190773e-06, "loss": 0.4605, "step": 2760 }, { "epoch": 0.6143747218513573, "grad_norm": 1.198173070731715, "learning_rate": 3.422051161271819e-06, "loss": 0.4771, "step": 2761 }, { "epoch": 0.6145972407654651, "grad_norm": 1.1493053561173312, "learning_rate": 3.418632158856563e-06, "loss": 0.463, "step": 2762 }, { "epoch": 0.6148197596795728, "grad_norm": 1.1420182190206312, "learning_rate": 3.415213977849237e-06, "loss": 0.4411, "step": 2763 }, { "epoch": 0.6150422785936804, "grad_norm": 1.1849736304619052, "learning_rate": 3.411796620025343e-06, "loss": 0.4532, "step": 2764 }, { "epoch": 0.6152647975077882, "grad_norm": 1.1640673141191231, "learning_rate": 3.4083800871599526e-06, "loss": 0.4608, "step": 2765 }, { "epoch": 0.6154873164218959, "grad_norm": 1.1994858624699043, "learning_rate": 3.4049643810277135e-06, "loss": 0.4684, "step": 2766 }, { "epoch": 0.6157098353360035, "grad_norm": 1.222348307451249, "learning_rate": 3.4015495034028405e-06, "loss": 0.4644, "step": 2767 }, { "epoch": 0.6159323542501113, "grad_norm": 1.2139510795751254, "learning_rate": 3.3981354560591216e-06, "loss": 0.4618, "step": 2768 }, { "epoch": 0.6161548731642189, "grad_norm": 1.3599992493589848, "learning_rate": 3.3947222407699094e-06, "loss": 0.4788, "step": 2769 }, { "epoch": 0.6163773920783266, "grad_norm": 1.2104298469505805, "learning_rate": 3.3913098593081264e-06, "loss": 0.4636, "step": 2770 }, { "epoch": 0.6165999109924344, "grad_norm": 1.4466673623923347, "learning_rate": 3.3878983134462596e-06, "loss": 0.4607, "step": 2771 }, { "epoch": 0.616822429906542, "grad_norm": 1.281892350752231, "learning_rate": 3.3844876049563645e-06, "loss": 0.451, "step": 2772 }, { "epoch": 0.6170449488206498, "grad_norm": 1.1647636246792565, "learning_rate": 3.3810777356100657e-06, "loss": 0.4585, "step": 2773 }, { "epoch": 0.6172674677347575, "grad_norm": 1.2366301041246257, "learning_rate": 3.3776687071785435e-06, "loss": 0.4798, "step": 2774 }, { "epoch": 0.6174899866488651, "grad_norm": 1.1804162484209804, "learning_rate": 3.374260521432546e-06, "loss": 0.4614, "step": 2775 }, { "epoch": 0.6177125055629729, "grad_norm": 1.1460387670373502, "learning_rate": 3.3708531801423818e-06, "loss": 0.4488, "step": 2776 }, { "epoch": 0.6179350244770806, "grad_norm": 1.2637836937779885, "learning_rate": 3.367446685077924e-06, "loss": 0.4621, "step": 2777 }, { "epoch": 0.6181575433911882, "grad_norm": 1.172535993648606, "learning_rate": 3.3640410380086015e-06, "loss": 0.4662, "step": 2778 }, { "epoch": 0.618380062305296, "grad_norm": 1.1661991896428754, "learning_rate": 3.3606362407034086e-06, "loss": 0.4522, "step": 2779 }, { "epoch": 0.6186025812194037, "grad_norm": 1.2396638370927129, "learning_rate": 3.3572322949308933e-06, "loss": 0.455, "step": 2780 }, { "epoch": 0.6188251001335113, "grad_norm": 1.1946617745912615, "learning_rate": 3.353829202459162e-06, "loss": 0.4393, "step": 2781 }, { "epoch": 0.6190476190476191, "grad_norm": 1.0748646331868952, "learning_rate": 3.35042696505588e-06, "loss": 0.4621, "step": 2782 }, { "epoch": 0.6192701379617267, "grad_norm": 1.2368295621353096, "learning_rate": 3.347025584488267e-06, "loss": 0.4644, "step": 2783 }, { "epoch": 0.6194926568758344, "grad_norm": 1.2151750555172147, "learning_rate": 3.343625062523098e-06, "loss": 0.4576, "step": 2784 }, { "epoch": 0.6197151757899422, "grad_norm": 1.24291385770843, "learning_rate": 3.3402254009267e-06, "loss": 0.4543, "step": 2785 }, { "epoch": 0.6199376947040498, "grad_norm": 1.135346443033411, "learning_rate": 3.3368266014649586e-06, "loss": 0.4378, "step": 2786 }, { "epoch": 0.6201602136181575, "grad_norm": 1.259211544620751, "learning_rate": 3.3334286659033046e-06, "loss": 0.4433, "step": 2787 }, { "epoch": 0.6203827325322653, "grad_norm": 1.3094000845089782, "learning_rate": 3.330031596006726e-06, "loss": 0.4552, "step": 2788 }, { "epoch": 0.6206052514463729, "grad_norm": 1.1904936058566347, "learning_rate": 3.3266353935397578e-06, "loss": 0.4602, "step": 2789 }, { "epoch": 0.6208277703604806, "grad_norm": 1.2545513516233968, "learning_rate": 3.323240060266484e-06, "loss": 0.4626, "step": 2790 }, { "epoch": 0.6210502892745884, "grad_norm": 1.2505113568324238, "learning_rate": 3.319845597950538e-06, "loss": 0.4657, "step": 2791 }, { "epoch": 0.621272808188696, "grad_norm": 1.233965539063061, "learning_rate": 3.316452008355103e-06, "loss": 0.4659, "step": 2792 }, { "epoch": 0.6214953271028038, "grad_norm": 1.2169651113964977, "learning_rate": 3.313059293242907e-06, "loss": 0.4423, "step": 2793 }, { "epoch": 0.6217178460169115, "grad_norm": 1.206794083878574, "learning_rate": 3.309667454376224e-06, "loss": 0.4437, "step": 2794 }, { "epoch": 0.6219403649310191, "grad_norm": 1.1965297924315694, "learning_rate": 3.306276493516871e-06, "loss": 0.4577, "step": 2795 }, { "epoch": 0.6221628838451269, "grad_norm": 1.1883878501594454, "learning_rate": 3.3028864124262105e-06, "loss": 0.4492, "step": 2796 }, { "epoch": 0.6223854027592345, "grad_norm": 1.248819584336844, "learning_rate": 3.2994972128651493e-06, "loss": 0.4406, "step": 2797 }, { "epoch": 0.6226079216733422, "grad_norm": 1.2212042797092788, "learning_rate": 3.2961088965941345e-06, "loss": 0.4607, "step": 2798 }, { "epoch": 0.62283044058745, "grad_norm": 1.211003084750809, "learning_rate": 3.2927214653731566e-06, "loss": 0.4642, "step": 2799 }, { "epoch": 0.6230529595015576, "grad_norm": 1.2578472001650476, "learning_rate": 3.2893349209617433e-06, "loss": 0.4546, "step": 2800 }, { "epoch": 0.6232754784156653, "grad_norm": 1.1465135132482929, "learning_rate": 3.2859492651189633e-06, "loss": 0.4667, "step": 2801 }, { "epoch": 0.6234979973297731, "grad_norm": 1.2619881913715816, "learning_rate": 3.2825644996034227e-06, "loss": 0.4603, "step": 2802 }, { "epoch": 0.6237205162438807, "grad_norm": 1.1621695544015958, "learning_rate": 3.279180626173266e-06, "loss": 0.4672, "step": 2803 }, { "epoch": 0.6239430351579884, "grad_norm": 1.1009357613639177, "learning_rate": 3.2757976465861775e-06, "loss": 0.4514, "step": 2804 }, { "epoch": 0.6241655540720962, "grad_norm": 1.2246126571621931, "learning_rate": 3.2724155625993712e-06, "loss": 0.4929, "step": 2805 }, { "epoch": 0.6243880729862038, "grad_norm": 1.1634940785298997, "learning_rate": 3.2690343759696e-06, "loss": 0.4649, "step": 2806 }, { "epoch": 0.6246105919003115, "grad_norm": 1.1029971855769143, "learning_rate": 3.265654088453148e-06, "loss": 0.457, "step": 2807 }, { "epoch": 0.6248331108144193, "grad_norm": 1.2274135498237209, "learning_rate": 3.2622747018058355e-06, "loss": 0.471, "step": 2808 }, { "epoch": 0.6250556297285269, "grad_norm": 1.1599311607000518, "learning_rate": 3.2588962177830104e-06, "loss": 0.4528, "step": 2809 }, { "epoch": 0.6252781486426346, "grad_norm": 1.1467892158991149, "learning_rate": 3.255518638139558e-06, "loss": 0.4464, "step": 2810 }, { "epoch": 0.6255006675567423, "grad_norm": 1.17345715864102, "learning_rate": 3.252141964629889e-06, "loss": 0.4365, "step": 2811 }, { "epoch": 0.62572318647085, "grad_norm": 1.2007262935349998, "learning_rate": 3.2487661990079435e-06, "loss": 0.4646, "step": 2812 }, { "epoch": 0.6259457053849578, "grad_norm": 1.1904934677109125, "learning_rate": 3.2453913430271926e-06, "loss": 0.462, "step": 2813 }, { "epoch": 0.6261682242990654, "grad_norm": 1.243749412945224, "learning_rate": 3.242017398440633e-06, "loss": 0.4668, "step": 2814 }, { "epoch": 0.6263907432131731, "grad_norm": 1.28453356806928, "learning_rate": 3.2386443670007872e-06, "loss": 0.44, "step": 2815 }, { "epoch": 0.6266132621272809, "grad_norm": 1.1537814343314856, "learning_rate": 3.2352722504597078e-06, "loss": 0.4425, "step": 2816 }, { "epoch": 0.6268357810413885, "grad_norm": 1.2343500058488472, "learning_rate": 3.2319010505689674e-06, "loss": 0.46, "step": 2817 }, { "epoch": 0.6270582999554962, "grad_norm": 1.4187687478324216, "learning_rate": 3.228530769079663e-06, "loss": 0.4604, "step": 2818 }, { "epoch": 0.627280818869604, "grad_norm": 1.275971093861971, "learning_rate": 3.2251614077424175e-06, "loss": 0.4512, "step": 2819 }, { "epoch": 0.6275033377837116, "grad_norm": 1.3219584300428244, "learning_rate": 3.2217929683073744e-06, "loss": 0.4633, "step": 2820 }, { "epoch": 0.6277258566978193, "grad_norm": 1.23686703041832, "learning_rate": 3.2184254525241965e-06, "loss": 0.4454, "step": 2821 }, { "epoch": 0.6279483756119271, "grad_norm": 1.140479892897462, "learning_rate": 3.2150588621420665e-06, "loss": 0.4429, "step": 2822 }, { "epoch": 0.6281708945260347, "grad_norm": 1.2336875453537937, "learning_rate": 3.2116931989096924e-06, "loss": 0.474, "step": 2823 }, { "epoch": 0.6283934134401424, "grad_norm": 1.1484000609477665, "learning_rate": 3.208328464575295e-06, "loss": 0.46, "step": 2824 }, { "epoch": 0.6286159323542501, "grad_norm": 1.2670866506444223, "learning_rate": 3.2049646608866135e-06, "loss": 0.4475, "step": 2825 }, { "epoch": 0.6288384512683578, "grad_norm": 1.1976295606364726, "learning_rate": 3.201601789590906e-06, "loss": 0.4644, "step": 2826 }, { "epoch": 0.6290609701824655, "grad_norm": 1.2002593854615737, "learning_rate": 3.1982398524349407e-06, "loss": 0.4486, "step": 2827 }, { "epoch": 0.6292834890965732, "grad_norm": 1.2222991766428035, "learning_rate": 3.1948788511650077e-06, "loss": 0.453, "step": 2828 }, { "epoch": 0.6295060080106809, "grad_norm": 1.1043347194473898, "learning_rate": 3.191518787526906e-06, "loss": 0.4612, "step": 2829 }, { "epoch": 0.6297285269247886, "grad_norm": 1.2123148039320377, "learning_rate": 3.188159663265951e-06, "loss": 0.45, "step": 2830 }, { "epoch": 0.6299510458388963, "grad_norm": 1.147512570158384, "learning_rate": 3.1848014801269684e-06, "loss": 0.4636, "step": 2831 }, { "epoch": 0.630173564753004, "grad_norm": 1.2152973666935563, "learning_rate": 3.1814442398542927e-06, "loss": 0.4542, "step": 2832 }, { "epoch": 0.6303960836671118, "grad_norm": 1.251698255192224, "learning_rate": 3.1780879441917733e-06, "loss": 0.4662, "step": 2833 }, { "epoch": 0.6306186025812194, "grad_norm": 1.1748352263789044, "learning_rate": 3.1747325948827643e-06, "loss": 0.4653, "step": 2834 }, { "epoch": 0.6308411214953271, "grad_norm": 1.1879326157736327, "learning_rate": 3.171378193670135e-06, "loss": 0.4602, "step": 2835 }, { "epoch": 0.6310636404094349, "grad_norm": 1.1901374390642636, "learning_rate": 3.168024742296254e-06, "loss": 0.4512, "step": 2836 }, { "epoch": 0.6312861593235425, "grad_norm": 1.3402983552439334, "learning_rate": 3.164672242503002e-06, "loss": 0.4481, "step": 2837 }, { "epoch": 0.6315086782376502, "grad_norm": 1.1935380187489268, "learning_rate": 3.1613206960317614e-06, "loss": 0.4646, "step": 2838 }, { "epoch": 0.6317311971517579, "grad_norm": 1.1846621506482147, "learning_rate": 3.1579701046234256e-06, "loss": 0.4565, "step": 2839 }, { "epoch": 0.6319537160658656, "grad_norm": 1.1960510434899345, "learning_rate": 3.1546204700183838e-06, "loss": 0.4705, "step": 2840 }, { "epoch": 0.6321762349799733, "grad_norm": 1.2591237201332164, "learning_rate": 3.1512717939565372e-06, "loss": 0.4691, "step": 2841 }, { "epoch": 0.632398753894081, "grad_norm": 1.202392360761496, "learning_rate": 3.1479240781772826e-06, "loss": 0.4385, "step": 2842 }, { "epoch": 0.6326212728081887, "grad_norm": 1.2301787942783886, "learning_rate": 3.144577324419518e-06, "loss": 0.471, "step": 2843 }, { "epoch": 0.6328437917222964, "grad_norm": 1.24917954214663, "learning_rate": 3.1412315344216453e-06, "loss": 0.4627, "step": 2844 }, { "epoch": 0.6330663106364041, "grad_norm": 1.214900842793211, "learning_rate": 3.1378867099215642e-06, "loss": 0.4689, "step": 2845 }, { "epoch": 0.6332888295505118, "grad_norm": 1.2530245519243821, "learning_rate": 3.13454285265667e-06, "loss": 0.4552, "step": 2846 }, { "epoch": 0.6335113484646195, "grad_norm": 1.2546563070181864, "learning_rate": 3.1311999643638634e-06, "loss": 0.4727, "step": 2847 }, { "epoch": 0.6337338673787272, "grad_norm": 1.1842838672996765, "learning_rate": 3.1278580467795327e-06, "loss": 0.4578, "step": 2848 }, { "epoch": 0.6339563862928349, "grad_norm": 1.2631432982441464, "learning_rate": 3.124517101639567e-06, "loss": 0.4653, "step": 2849 }, { "epoch": 0.6341789052069426, "grad_norm": 1.2227504401707106, "learning_rate": 3.12117713067935e-06, "loss": 0.4392, "step": 2850 }, { "epoch": 0.6344014241210503, "grad_norm": 1.1921952652251593, "learning_rate": 3.1178381356337585e-06, "loss": 0.4555, "step": 2851 }, { "epoch": 0.634623943035158, "grad_norm": 1.2322712336918875, "learning_rate": 3.1145001182371593e-06, "loss": 0.4522, "step": 2852 }, { "epoch": 0.6348464619492656, "grad_norm": 1.1820439521038302, "learning_rate": 3.1111630802234205e-06, "loss": 0.4471, "step": 2853 }, { "epoch": 0.6350689808633734, "grad_norm": 1.2202494262944066, "learning_rate": 3.1078270233258913e-06, "loss": 0.4675, "step": 2854 }, { "epoch": 0.6352914997774811, "grad_norm": 1.2491982437921103, "learning_rate": 3.1044919492774173e-06, "loss": 0.465, "step": 2855 }, { "epoch": 0.6355140186915887, "grad_norm": 1.1889871465513593, "learning_rate": 3.1011578598103316e-06, "loss": 0.4475, "step": 2856 }, { "epoch": 0.6357365376056965, "grad_norm": 1.233719866515114, "learning_rate": 3.097824756656456e-06, "loss": 0.4669, "step": 2857 }, { "epoch": 0.6359590565198042, "grad_norm": 1.1808471701579586, "learning_rate": 3.0944926415470986e-06, "loss": 0.4506, "step": 2858 }, { "epoch": 0.6361815754339119, "grad_norm": 1.196511631484708, "learning_rate": 3.0911615162130583e-06, "loss": 0.4561, "step": 2859 }, { "epoch": 0.6364040943480196, "grad_norm": 1.248831918720143, "learning_rate": 3.0878313823846152e-06, "loss": 0.4482, "step": 2860 }, { "epoch": 0.6366266132621273, "grad_norm": 1.239496804446783, "learning_rate": 3.0845022417915394e-06, "loss": 0.4588, "step": 2861 }, { "epoch": 0.636849132176235, "grad_norm": 1.3465846936187607, "learning_rate": 3.0811740961630804e-06, "loss": 0.4372, "step": 2862 }, { "epoch": 0.6370716510903427, "grad_norm": 1.1623125027362002, "learning_rate": 3.0778469472279714e-06, "loss": 0.4771, "step": 2863 }, { "epoch": 0.6372941700044504, "grad_norm": 1.2016197630886771, "learning_rate": 3.074520796714433e-06, "loss": 0.4575, "step": 2864 }, { "epoch": 0.6375166889185581, "grad_norm": 1.2041321621516783, "learning_rate": 3.071195646350158e-06, "loss": 0.4701, "step": 2865 }, { "epoch": 0.6377392078326658, "grad_norm": 1.336527205281219, "learning_rate": 3.067871497862331e-06, "loss": 0.4671, "step": 2866 }, { "epoch": 0.6379617267467734, "grad_norm": 1.2343885633006582, "learning_rate": 3.064548352977608e-06, "loss": 0.4423, "step": 2867 }, { "epoch": 0.6381842456608812, "grad_norm": 1.2143213384873481, "learning_rate": 3.0612262134221245e-06, "loss": 0.4479, "step": 2868 }, { "epoch": 0.6384067645749889, "grad_norm": 1.378190128846271, "learning_rate": 3.0579050809214954e-06, "loss": 0.4446, "step": 2869 }, { "epoch": 0.6386292834890965, "grad_norm": 1.1400758451362727, "learning_rate": 3.0545849572008153e-06, "loss": 0.4599, "step": 2870 }, { "epoch": 0.6388518024032043, "grad_norm": 1.4046411170465627, "learning_rate": 3.0512658439846476e-06, "loss": 0.4596, "step": 2871 }, { "epoch": 0.639074321317312, "grad_norm": 1.2217953453933947, "learning_rate": 3.0479477429970393e-06, "loss": 0.4473, "step": 2872 }, { "epoch": 0.6392968402314196, "grad_norm": 1.2193601701736991, "learning_rate": 3.044630655961507e-06, "loss": 0.4424, "step": 2873 }, { "epoch": 0.6395193591455274, "grad_norm": 1.238385243668993, "learning_rate": 3.0413145846010376e-06, "loss": 0.4544, "step": 2874 }, { "epoch": 0.6397418780596351, "grad_norm": 1.2969103510790807, "learning_rate": 3.0379995306380985e-06, "loss": 0.4572, "step": 2875 }, { "epoch": 0.6399643969737427, "grad_norm": 1.2880426369220315, "learning_rate": 3.03468549579462e-06, "loss": 0.4491, "step": 2876 }, { "epoch": 0.6401869158878505, "grad_norm": 1.3098884679076586, "learning_rate": 3.0313724817920088e-06, "loss": 0.462, "step": 2877 }, { "epoch": 0.6404094348019582, "grad_norm": 1.2646691046557874, "learning_rate": 3.02806049035114e-06, "loss": 0.451, "step": 2878 }, { "epoch": 0.6406319537160658, "grad_norm": 1.3609103667906384, "learning_rate": 3.024749523192358e-06, "loss": 0.4612, "step": 2879 }, { "epoch": 0.6408544726301736, "grad_norm": 1.250344684769425, "learning_rate": 3.021439582035472e-06, "loss": 0.4451, "step": 2880 }, { "epoch": 0.6410769915442812, "grad_norm": 1.1930719720467637, "learning_rate": 3.0181306685997622e-06, "loss": 0.4416, "step": 2881 }, { "epoch": 0.641299510458389, "grad_norm": 1.2062909431676734, "learning_rate": 3.014822784603974e-06, "loss": 0.4517, "step": 2882 }, { "epoch": 0.6415220293724967, "grad_norm": 1.2056293136282377, "learning_rate": 3.0115159317663147e-06, "loss": 0.4572, "step": 2883 }, { "epoch": 0.6417445482866043, "grad_norm": 1.24096278618492, "learning_rate": 3.0082101118044617e-06, "loss": 0.4627, "step": 2884 }, { "epoch": 0.6419670672007121, "grad_norm": 1.2215408509137637, "learning_rate": 3.004905326435551e-06, "loss": 0.4458, "step": 2885 }, { "epoch": 0.6421895861148198, "grad_norm": 1.2109629180717454, "learning_rate": 3.0016015773761854e-06, "loss": 0.4585, "step": 2886 }, { "epoch": 0.6424121050289274, "grad_norm": 1.1860741894621913, "learning_rate": 2.998298866342425e-06, "loss": 0.4615, "step": 2887 }, { "epoch": 0.6426346239430352, "grad_norm": 1.2427564223815732, "learning_rate": 2.9949971950497943e-06, "loss": 0.4417, "step": 2888 }, { "epoch": 0.6428571428571429, "grad_norm": 1.2741238445433691, "learning_rate": 2.9916965652132734e-06, "loss": 0.453, "step": 2889 }, { "epoch": 0.6430796617712505, "grad_norm": 1.2769875092663787, "learning_rate": 2.988396978547308e-06, "loss": 0.4512, "step": 2890 }, { "epoch": 0.6433021806853583, "grad_norm": 1.2544144066775458, "learning_rate": 2.9850984367657964e-06, "loss": 0.4459, "step": 2891 }, { "epoch": 0.6435246995994659, "grad_norm": 1.3095929532345636, "learning_rate": 2.9818009415820983e-06, "loss": 0.4534, "step": 2892 }, { "epoch": 0.6437472185135736, "grad_norm": 1.2213090053345543, "learning_rate": 2.9785044947090245e-06, "loss": 0.459, "step": 2893 }, { "epoch": 0.6439697374276814, "grad_norm": 1.2559643457428245, "learning_rate": 2.9752090978588454e-06, "loss": 0.4586, "step": 2894 }, { "epoch": 0.644192256341789, "grad_norm": 1.2208175242067365, "learning_rate": 2.971914752743286e-06, "loss": 0.4591, "step": 2895 }, { "epoch": 0.6444147752558967, "grad_norm": 1.2706696609963573, "learning_rate": 2.96862146107352e-06, "loss": 0.4282, "step": 2896 }, { "epoch": 0.6446372941700045, "grad_norm": 1.1708495996388715, "learning_rate": 2.9653292245601835e-06, "loss": 0.4427, "step": 2897 }, { "epoch": 0.6448598130841121, "grad_norm": 1.186215421445486, "learning_rate": 2.9620380449133558e-06, "loss": 0.443, "step": 2898 }, { "epoch": 0.6450823319982198, "grad_norm": 1.2073453614394716, "learning_rate": 2.9587479238425704e-06, "loss": 0.4483, "step": 2899 }, { "epoch": 0.6453048509123276, "grad_norm": 1.1891651294930266, "learning_rate": 2.95545886305681e-06, "loss": 0.4504, "step": 2900 }, { "epoch": 0.6455273698264352, "grad_norm": 1.122022485337828, "learning_rate": 2.9521708642645085e-06, "loss": 0.4468, "step": 2901 }, { "epoch": 0.645749888740543, "grad_norm": 1.1626797435008898, "learning_rate": 2.948883929173546e-06, "loss": 0.4619, "step": 2902 }, { "epoch": 0.6459724076546507, "grad_norm": 1.2188506865811386, "learning_rate": 2.945598059491253e-06, "loss": 0.4528, "step": 2903 }, { "epoch": 0.6461949265687583, "grad_norm": 1.1595154360019504, "learning_rate": 2.942313256924404e-06, "loss": 0.446, "step": 2904 }, { "epoch": 0.6464174454828661, "grad_norm": 1.2403070751821428, "learning_rate": 2.9390295231792184e-06, "loss": 0.4541, "step": 2905 }, { "epoch": 0.6466399643969737, "grad_norm": 1.2978222136607132, "learning_rate": 2.9357468599613637e-06, "loss": 0.4567, "step": 2906 }, { "epoch": 0.6468624833110814, "grad_norm": 1.2215406652381304, "learning_rate": 2.9324652689759493e-06, "loss": 0.4466, "step": 2907 }, { "epoch": 0.6470850022251892, "grad_norm": 1.3788416431971462, "learning_rate": 2.9291847519275245e-06, "loss": 0.4746, "step": 2908 }, { "epoch": 0.6473075211392968, "grad_norm": 1.2214953905291976, "learning_rate": 2.9259053105200895e-06, "loss": 0.4486, "step": 2909 }, { "epoch": 0.6475300400534045, "grad_norm": 1.2950478604248195, "learning_rate": 2.9226269464570788e-06, "loss": 0.4464, "step": 2910 }, { "epoch": 0.6477525589675123, "grad_norm": 1.2446062659981423, "learning_rate": 2.919349661441367e-06, "loss": 0.4727, "step": 2911 }, { "epoch": 0.6479750778816199, "grad_norm": 1.2564195839798353, "learning_rate": 2.9160734571752736e-06, "loss": 0.4446, "step": 2912 }, { "epoch": 0.6481975967957276, "grad_norm": 1.1942345498149682, "learning_rate": 2.9127983353605488e-06, "loss": 0.4419, "step": 2913 }, { "epoch": 0.6484201157098354, "grad_norm": 1.2442358976217953, "learning_rate": 2.9095242976983856e-06, "loss": 0.4536, "step": 2914 }, { "epoch": 0.648642634623943, "grad_norm": 1.171561271631086, "learning_rate": 2.9062513458894194e-06, "loss": 0.439, "step": 2915 }, { "epoch": 0.6488651535380507, "grad_norm": 1.2365361523566292, "learning_rate": 2.9029794816337085e-06, "loss": 0.4565, "step": 2916 }, { "epoch": 0.6490876724521585, "grad_norm": 1.2612300644775842, "learning_rate": 2.8997087066307587e-06, "loss": 0.4417, "step": 2917 }, { "epoch": 0.6493101913662661, "grad_norm": 1.2124182879792218, "learning_rate": 2.8964390225794995e-06, "loss": 0.452, "step": 2918 }, { "epoch": 0.6495327102803738, "grad_norm": 1.2506749879687344, "learning_rate": 2.8931704311783014e-06, "loss": 0.4501, "step": 2919 }, { "epoch": 0.6497552291944815, "grad_norm": 1.145596975112666, "learning_rate": 2.8899029341249652e-06, "loss": 0.4584, "step": 2920 }, { "epoch": 0.6499777481085892, "grad_norm": 1.1581573826312999, "learning_rate": 2.886636533116721e-06, "loss": 0.4329, "step": 2921 }, { "epoch": 0.650200267022697, "grad_norm": 1.204827487990206, "learning_rate": 2.883371229850236e-06, "loss": 0.4579, "step": 2922 }, { "epoch": 0.6504227859368046, "grad_norm": 1.1682509504307461, "learning_rate": 2.8801070260215956e-06, "loss": 0.4607, "step": 2923 }, { "epoch": 0.6506453048509123, "grad_norm": 1.1234257997353188, "learning_rate": 2.8768439233263257e-06, "loss": 0.4439, "step": 2924 }, { "epoch": 0.6508678237650201, "grad_norm": 1.1853970284478308, "learning_rate": 2.873581923459377e-06, "loss": 0.4611, "step": 2925 }, { "epoch": 0.6510903426791277, "grad_norm": 1.2181647712232477, "learning_rate": 2.8703210281151193e-06, "loss": 0.4497, "step": 2926 }, { "epoch": 0.6513128615932354, "grad_norm": 1.1799726125025416, "learning_rate": 2.867061238987364e-06, "loss": 0.4474, "step": 2927 }, { "epoch": 0.6515353805073432, "grad_norm": 1.2376523239990251, "learning_rate": 2.8638025577693328e-06, "loss": 0.4591, "step": 2928 }, { "epoch": 0.6517578994214508, "grad_norm": 1.219730507839563, "learning_rate": 2.8605449861536817e-06, "loss": 0.4463, "step": 2929 }, { "epoch": 0.6519804183355585, "grad_norm": 1.177835804921288, "learning_rate": 2.8572885258324876e-06, "loss": 0.4431, "step": 2930 }, { "epoch": 0.6522029372496663, "grad_norm": 1.1965397149223174, "learning_rate": 2.8540331784972464e-06, "loss": 0.4335, "step": 2931 }, { "epoch": 0.6524254561637739, "grad_norm": 1.3844729308270685, "learning_rate": 2.8507789458388824e-06, "loss": 0.448, "step": 2932 }, { "epoch": 0.6526479750778816, "grad_norm": 1.2412981700253913, "learning_rate": 2.8475258295477324e-06, "loss": 0.4617, "step": 2933 }, { "epoch": 0.6528704939919893, "grad_norm": 1.4300047229499444, "learning_rate": 2.844273831313565e-06, "loss": 0.4475, "step": 2934 }, { "epoch": 0.653093012906097, "grad_norm": 1.3698124630242028, "learning_rate": 2.8410229528255605e-06, "loss": 0.4264, "step": 2935 }, { "epoch": 0.6533155318202047, "grad_norm": 1.4168379248750738, "learning_rate": 2.837773195772315e-06, "loss": 0.438, "step": 2936 }, { "epoch": 0.6535380507343124, "grad_norm": 1.3697457806862658, "learning_rate": 2.8345245618418493e-06, "loss": 0.452, "step": 2937 }, { "epoch": 0.6537605696484201, "grad_norm": 1.3340185266839548, "learning_rate": 2.8312770527215942e-06, "loss": 0.436, "step": 2938 }, { "epoch": 0.6539830885625278, "grad_norm": 1.21599078252105, "learning_rate": 2.828030670098401e-06, "loss": 0.4418, "step": 2939 }, { "epoch": 0.6542056074766355, "grad_norm": 1.4129555150820292, "learning_rate": 2.824785415658534e-06, "loss": 0.455, "step": 2940 }, { "epoch": 0.6544281263907432, "grad_norm": 1.2398216494036733, "learning_rate": 2.8215412910876725e-06, "loss": 0.4534, "step": 2941 }, { "epoch": 0.654650645304851, "grad_norm": 1.1668179124387754, "learning_rate": 2.8182982980709082e-06, "loss": 0.4502, "step": 2942 }, { "epoch": 0.6548731642189586, "grad_norm": 1.1560345868904374, "learning_rate": 2.815056438292743e-06, "loss": 0.4456, "step": 2943 }, { "epoch": 0.6550956831330663, "grad_norm": 1.2976761653552427, "learning_rate": 2.8118157134370928e-06, "loss": 0.4289, "step": 2944 }, { "epoch": 0.6553182020471741, "grad_norm": 1.1830974607313658, "learning_rate": 2.8085761251872847e-06, "loss": 0.4548, "step": 2945 }, { "epoch": 0.6555407209612817, "grad_norm": 1.2173797476265331, "learning_rate": 2.8053376752260515e-06, "loss": 0.4411, "step": 2946 }, { "epoch": 0.6557632398753894, "grad_norm": 1.2411914753107627, "learning_rate": 2.802100365235542e-06, "loss": 0.4549, "step": 2947 }, { "epoch": 0.655985758789497, "grad_norm": 1.2501660062037563, "learning_rate": 2.798864196897301e-06, "loss": 0.453, "step": 2948 }, { "epoch": 0.6562082777036048, "grad_norm": 1.3372162059718637, "learning_rate": 2.7956291718922925e-06, "loss": 0.464, "step": 2949 }, { "epoch": 0.6564307966177125, "grad_norm": 1.3339209965667849, "learning_rate": 2.7923952919008823e-06, "loss": 0.4463, "step": 2950 }, { "epoch": 0.6566533155318202, "grad_norm": 1.3228412728157366, "learning_rate": 2.7891625586028336e-06, "loss": 0.4504, "step": 2951 }, { "epoch": 0.6568758344459279, "grad_norm": 1.2385402810930741, "learning_rate": 2.785930973677331e-06, "loss": 0.4598, "step": 2952 }, { "epoch": 0.6570983533600356, "grad_norm": 1.2615173243728333, "learning_rate": 2.7827005388029454e-06, "loss": 0.4412, "step": 2953 }, { "epoch": 0.6573208722741433, "grad_norm": 1.2348354820545193, "learning_rate": 2.77947125565766e-06, "loss": 0.462, "step": 2954 }, { "epoch": 0.657543391188251, "grad_norm": 1.2149268619685145, "learning_rate": 2.776243125918858e-06, "loss": 0.4569, "step": 2955 }, { "epoch": 0.6577659101023587, "grad_norm": 1.2378833519239143, "learning_rate": 2.773016151263321e-06, "loss": 0.4651, "step": 2956 }, { "epoch": 0.6579884290164664, "grad_norm": 1.2540348552681835, "learning_rate": 2.769790333367234e-06, "loss": 0.4654, "step": 2957 }, { "epoch": 0.6582109479305741, "grad_norm": 1.3017689094124931, "learning_rate": 2.7665656739061777e-06, "loss": 0.4558, "step": 2958 }, { "epoch": 0.6584334668446818, "grad_norm": 1.250698942550638, "learning_rate": 2.763342174555137e-06, "loss": 0.4653, "step": 2959 }, { "epoch": 0.6586559857587895, "grad_norm": 1.2719107191535381, "learning_rate": 2.7601198369884845e-06, "loss": 0.4506, "step": 2960 }, { "epoch": 0.6588785046728972, "grad_norm": 1.2137074031507613, "learning_rate": 2.756898662879999e-06, "loss": 0.4477, "step": 2961 }, { "epoch": 0.6591010235870048, "grad_norm": 1.2361224659156878, "learning_rate": 2.7536786539028503e-06, "loss": 0.4523, "step": 2962 }, { "epoch": 0.6593235425011126, "grad_norm": 1.2598199292045158, "learning_rate": 2.7504598117296e-06, "loss": 0.4438, "step": 2963 }, { "epoch": 0.6595460614152203, "grad_norm": 1.3532190980084884, "learning_rate": 2.7472421380322147e-06, "loss": 0.443, "step": 2964 }, { "epoch": 0.659768580329328, "grad_norm": 1.2071641014203807, "learning_rate": 2.7440256344820404e-06, "loss": 0.4506, "step": 2965 }, { "epoch": 0.6599910992434357, "grad_norm": 1.1584558124479705, "learning_rate": 2.7408103027498236e-06, "loss": 0.4282, "step": 2966 }, { "epoch": 0.6602136181575434, "grad_norm": 1.2818512471161054, "learning_rate": 2.7375961445057014e-06, "loss": 0.4761, "step": 2967 }, { "epoch": 0.660436137071651, "grad_norm": 1.4035738277677414, "learning_rate": 2.734383161419197e-06, "loss": 0.4217, "step": 2968 }, { "epoch": 0.6606586559857588, "grad_norm": 1.2564993564868892, "learning_rate": 2.731171355159228e-06, "loss": 0.463, "step": 2969 }, { "epoch": 0.6608811748998665, "grad_norm": 1.4213675781382185, "learning_rate": 2.7279607273940977e-06, "loss": 0.4619, "step": 2970 }, { "epoch": 0.6611036938139742, "grad_norm": 1.9215305544190924, "learning_rate": 2.724751279791501e-06, "loss": 0.4555, "step": 2971 }, { "epoch": 0.6613262127280819, "grad_norm": 1.3435338217208457, "learning_rate": 2.7215430140185166e-06, "loss": 0.4359, "step": 2972 }, { "epoch": 0.6615487316421896, "grad_norm": 1.2727594916761942, "learning_rate": 2.718335931741608e-06, "loss": 0.4454, "step": 2973 }, { "epoch": 0.6617712505562973, "grad_norm": 1.2993977788382942, "learning_rate": 2.7151300346266286e-06, "loss": 0.4456, "step": 2974 }, { "epoch": 0.661993769470405, "grad_norm": 1.3151228303256353, "learning_rate": 2.7119253243388113e-06, "loss": 0.468, "step": 2975 }, { "epoch": 0.6622162883845126, "grad_norm": 1.218176879540063, "learning_rate": 2.708721802542772e-06, "loss": 0.4701, "step": 2976 }, { "epoch": 0.6624388072986204, "grad_norm": 1.212234462698301, "learning_rate": 2.7055194709025203e-06, "loss": 0.4355, "step": 2977 }, { "epoch": 0.6626613262127281, "grad_norm": 1.3927774523053795, "learning_rate": 2.7023183310814325e-06, "loss": 0.4518, "step": 2978 }, { "epoch": 0.6628838451268357, "grad_norm": 1.3044534625556792, "learning_rate": 2.6991183847422773e-06, "loss": 0.4318, "step": 2979 }, { "epoch": 0.6631063640409435, "grad_norm": 1.2568439723525735, "learning_rate": 2.6959196335471937e-06, "loss": 0.4398, "step": 2980 }, { "epoch": 0.6633288829550512, "grad_norm": 1.2439690710708817, "learning_rate": 2.6927220791577084e-06, "loss": 0.4642, "step": 2981 }, { "epoch": 0.6635514018691588, "grad_norm": 1.4120902397167392, "learning_rate": 2.6895257232347226e-06, "loss": 0.4341, "step": 2982 }, { "epoch": 0.6637739207832666, "grad_norm": 1.3248862237851275, "learning_rate": 2.686330567438515e-06, "loss": 0.4353, "step": 2983 }, { "epoch": 0.6639964396973743, "grad_norm": 1.3554886160333566, "learning_rate": 2.6831366134287434e-06, "loss": 0.4491, "step": 2984 }, { "epoch": 0.664218958611482, "grad_norm": 1.230522656833347, "learning_rate": 2.6799438628644357e-06, "loss": 0.4358, "step": 2985 }, { "epoch": 0.6644414775255897, "grad_norm": 1.3982083300239363, "learning_rate": 2.676752317404001e-06, "loss": 0.4447, "step": 2986 }, { "epoch": 0.6646639964396974, "grad_norm": 1.245377596975314, "learning_rate": 2.67356197870522e-06, "loss": 0.4536, "step": 2987 }, { "epoch": 0.664886515353805, "grad_norm": 1.4138994729790137, "learning_rate": 2.6703728484252424e-06, "loss": 0.4552, "step": 2988 }, { "epoch": 0.6651090342679128, "grad_norm": 1.3011532943580382, "learning_rate": 2.667184928220601e-06, "loss": 0.4581, "step": 2989 }, { "epoch": 0.6653315531820204, "grad_norm": 1.3539182841295396, "learning_rate": 2.6639982197471875e-06, "loss": 0.433, "step": 2990 }, { "epoch": 0.6655540720961282, "grad_norm": 1.2286718446812646, "learning_rate": 2.6608127246602715e-06, "loss": 0.4358, "step": 2991 }, { "epoch": 0.6657765910102359, "grad_norm": 1.2728651460768716, "learning_rate": 2.657628444614494e-06, "loss": 0.4423, "step": 2992 }, { "epoch": 0.6659991099243435, "grad_norm": 1.1755048487923674, "learning_rate": 2.654445381263856e-06, "loss": 0.4458, "step": 2993 }, { "epoch": 0.6662216288384513, "grad_norm": 1.1654593976370538, "learning_rate": 2.651263536261735e-06, "loss": 0.4404, "step": 2994 }, { "epoch": 0.666444147752559, "grad_norm": 1.1537424627379744, "learning_rate": 2.648082911260873e-06, "loss": 0.4389, "step": 2995 }, { "epoch": 0.6666666666666666, "grad_norm": 1.2313375104576687, "learning_rate": 2.6449035079133772e-06, "loss": 0.4574, "step": 2996 }, { "epoch": 0.6668891855807744, "grad_norm": 1.2537186793611221, "learning_rate": 2.6417253278707246e-06, "loss": 0.4412, "step": 2997 }, { "epoch": 0.6671117044948821, "grad_norm": 1.2253921794469296, "learning_rate": 2.6385483727837472e-06, "loss": 0.4375, "step": 2998 }, { "epoch": 0.6673342234089897, "grad_norm": 1.1645845560902803, "learning_rate": 2.635372644302653e-06, "loss": 0.4395, "step": 2999 }, { "epoch": 0.6675567423230975, "grad_norm": 1.2246818379164357, "learning_rate": 2.6321981440769995e-06, "loss": 0.4411, "step": 3000 }, { "epoch": 0.6677792612372052, "grad_norm": 1.2310490752084782, "learning_rate": 2.629024873755721e-06, "loss": 0.4528, "step": 3001 }, { "epoch": 0.6680017801513128, "grad_norm": 1.2656326899472647, "learning_rate": 2.6258528349871004e-06, "loss": 0.4623, "step": 3002 }, { "epoch": 0.6682242990654206, "grad_norm": 1.234485556726595, "learning_rate": 2.622682029418788e-06, "loss": 0.4468, "step": 3003 }, { "epoch": 0.6684468179795282, "grad_norm": 1.2641886729694989, "learning_rate": 2.6195124586977923e-06, "loss": 0.4468, "step": 3004 }, { "epoch": 0.668669336893636, "grad_norm": 1.2896009730521376, "learning_rate": 2.6163441244704758e-06, "loss": 0.4521, "step": 3005 }, { "epoch": 0.6688918558077437, "grad_norm": 1.1752357887268678, "learning_rate": 2.6131770283825653e-06, "loss": 0.4419, "step": 3006 }, { "epoch": 0.6691143747218513, "grad_norm": 1.231996594780031, "learning_rate": 2.61001117207914e-06, "loss": 0.4598, "step": 3007 }, { "epoch": 0.669336893635959, "grad_norm": 1.2842144632053816, "learning_rate": 2.606846557204638e-06, "loss": 0.4529, "step": 3008 }, { "epoch": 0.6695594125500668, "grad_norm": 1.2617029185115771, "learning_rate": 2.603683185402853e-06, "loss": 0.4491, "step": 3009 }, { "epoch": 0.6697819314641744, "grad_norm": 1.1998782995362625, "learning_rate": 2.600521058316927e-06, "loss": 0.428, "step": 3010 }, { "epoch": 0.6700044503782822, "grad_norm": 1.533050775034664, "learning_rate": 2.597360177589362e-06, "loss": 0.4521, "step": 3011 }, { "epoch": 0.6702269692923899, "grad_norm": 1.231543400500169, "learning_rate": 2.594200544862012e-06, "loss": 0.4319, "step": 3012 }, { "epoch": 0.6704494882064975, "grad_norm": 1.2113844785127976, "learning_rate": 2.591042161776075e-06, "loss": 0.4421, "step": 3013 }, { "epoch": 0.6706720071206053, "grad_norm": 1.3655916396501966, "learning_rate": 2.5878850299721148e-06, "loss": 0.448, "step": 3014 }, { "epoch": 0.670894526034713, "grad_norm": 1.5795678540111555, "learning_rate": 2.58472915109003e-06, "loss": 0.4568, "step": 3015 }, { "epoch": 0.6711170449488206, "grad_norm": 1.2246405539570018, "learning_rate": 2.581574526769076e-06, "loss": 0.4425, "step": 3016 }, { "epoch": 0.6713395638629284, "grad_norm": 1.3831602234014484, "learning_rate": 2.578421158647859e-06, "loss": 0.4467, "step": 3017 }, { "epoch": 0.671562082777036, "grad_norm": 1.2537399843574648, "learning_rate": 2.575269048364324e-06, "loss": 0.4293, "step": 3018 }, { "epoch": 0.6717846016911437, "grad_norm": 1.2779774881659152, "learning_rate": 2.57211819755577e-06, "loss": 0.4458, "step": 3019 }, { "epoch": 0.6720071206052515, "grad_norm": 1.2561603686882785, "learning_rate": 2.5689686078588394e-06, "loss": 0.4549, "step": 3020 }, { "epoch": 0.6722296395193591, "grad_norm": 1.38961477035113, "learning_rate": 2.565820280909521e-06, "loss": 0.4468, "step": 3021 }, { "epoch": 0.6724521584334668, "grad_norm": 1.4765813213996584, "learning_rate": 2.5626732183431446e-06, "loss": 0.4347, "step": 3022 }, { "epoch": 0.6726746773475746, "grad_norm": 1.2870271061159066, "learning_rate": 2.5595274217943844e-06, "loss": 0.4438, "step": 3023 }, { "epoch": 0.6728971962616822, "grad_norm": 1.2611684553866391, "learning_rate": 2.556382892897261e-06, "loss": 0.4515, "step": 3024 }, { "epoch": 0.67311971517579, "grad_norm": 1.4487014653972596, "learning_rate": 2.5532396332851266e-06, "loss": 0.4477, "step": 3025 }, { "epoch": 0.6733422340898977, "grad_norm": 1.4132422049200049, "learning_rate": 2.550097644590689e-06, "loss": 0.4642, "step": 3026 }, { "epoch": 0.6735647530040053, "grad_norm": 1.3031246118775055, "learning_rate": 2.5469569284459814e-06, "loss": 0.4438, "step": 3027 }, { "epoch": 0.673787271918113, "grad_norm": 1.4298421187901877, "learning_rate": 2.543817486482384e-06, "loss": 0.4619, "step": 3028 }, { "epoch": 0.6740097908322208, "grad_norm": 1.2305868148921364, "learning_rate": 2.540679320330615e-06, "loss": 0.4475, "step": 3029 }, { "epoch": 0.6742323097463284, "grad_norm": 1.2334789180224621, "learning_rate": 2.537542431620724e-06, "loss": 0.4562, "step": 3030 }, { "epoch": 0.6744548286604362, "grad_norm": 1.189921723183456, "learning_rate": 2.534406821982105e-06, "loss": 0.4403, "step": 3031 }, { "epoch": 0.6746773475745438, "grad_norm": 1.2232564945450088, "learning_rate": 2.5312724930434806e-06, "loss": 0.4595, "step": 3032 }, { "epoch": 0.6748998664886515, "grad_norm": 1.2234747582508982, "learning_rate": 2.5281394464329144e-06, "loss": 0.4462, "step": 3033 }, { "epoch": 0.6751223854027593, "grad_norm": 1.6183028117779468, "learning_rate": 2.5250076837778015e-06, "loss": 0.4455, "step": 3034 }, { "epoch": 0.6753449043168669, "grad_norm": 1.1829841784025417, "learning_rate": 2.521877206704865e-06, "loss": 0.4488, "step": 3035 }, { "epoch": 0.6755674232309746, "grad_norm": 1.197094586004116, "learning_rate": 2.5187480168401697e-06, "loss": 0.4503, "step": 3036 }, { "epoch": 0.6757899421450824, "grad_norm": 1.3479462224565908, "learning_rate": 2.5156201158091e-06, "loss": 0.4471, "step": 3037 }, { "epoch": 0.67601246105919, "grad_norm": 1.2124463046535745, "learning_rate": 2.5124935052363854e-06, "loss": 0.4286, "step": 3038 }, { "epoch": 0.6762349799732977, "grad_norm": 1.2716082629402938, "learning_rate": 2.5093681867460743e-06, "loss": 0.455, "step": 3039 }, { "epoch": 0.6764574988874055, "grad_norm": 1.232775670057993, "learning_rate": 2.5062441619615457e-06, "loss": 0.4629, "step": 3040 }, { "epoch": 0.6766800178015131, "grad_norm": 1.214983095120478, "learning_rate": 2.503121432505511e-06, "loss": 0.4482, "step": 3041 }, { "epoch": 0.6769025367156208, "grad_norm": 1.3333321296456726, "learning_rate": 2.5000000000000015e-06, "loss": 0.4472, "step": 3042 }, { "epoch": 0.6771250556297286, "grad_norm": 1.243728976325135, "learning_rate": 2.4968798660663805e-06, "loss": 0.4468, "step": 3043 }, { "epoch": 0.6773475745438362, "grad_norm": 1.2881268272688615, "learning_rate": 2.4937610323253363e-06, "loss": 0.4453, "step": 3044 }, { "epoch": 0.677570093457944, "grad_norm": 1.2881650015846096, "learning_rate": 2.4906435003968804e-06, "loss": 0.4448, "step": 3045 }, { "epoch": 0.6777926123720516, "grad_norm": 1.2473210232692005, "learning_rate": 2.4875272719003505e-06, "loss": 0.4325, "step": 3046 }, { "epoch": 0.6780151312861593, "grad_norm": 1.312364426804757, "learning_rate": 2.484412348454402e-06, "loss": 0.4495, "step": 3047 }, { "epoch": 0.678237650200267, "grad_norm": 1.1966296208937246, "learning_rate": 2.481298731677016e-06, "loss": 0.4361, "step": 3048 }, { "epoch": 0.6784601691143747, "grad_norm": 1.3036048949092982, "learning_rate": 2.4781864231854983e-06, "loss": 0.4466, "step": 3049 }, { "epoch": 0.6786826880284824, "grad_norm": 1.2773143721593314, "learning_rate": 2.475075424596465e-06, "loss": 0.4496, "step": 3050 }, { "epoch": 0.6789052069425902, "grad_norm": 1.2442135635165397, "learning_rate": 2.471965737525865e-06, "loss": 0.4509, "step": 3051 }, { "epoch": 0.6791277258566978, "grad_norm": 1.1934463154997748, "learning_rate": 2.4688573635889545e-06, "loss": 0.4362, "step": 3052 }, { "epoch": 0.6793502447708055, "grad_norm": 1.2077131608001868, "learning_rate": 2.4657503044003144e-06, "loss": 0.4594, "step": 3053 }, { "epoch": 0.6795727636849133, "grad_norm": 1.3209207041982716, "learning_rate": 2.462644561573842e-06, "loss": 0.4495, "step": 3054 }, { "epoch": 0.6797952825990209, "grad_norm": 1.2913219835089627, "learning_rate": 2.4595401367227455e-06, "loss": 0.4478, "step": 3055 }, { "epoch": 0.6800178015131286, "grad_norm": 1.196723088752008, "learning_rate": 2.456437031459555e-06, "loss": 0.4461, "step": 3056 }, { "epoch": 0.6802403204272364, "grad_norm": 1.2404819814611148, "learning_rate": 2.4533352473961115e-06, "loss": 0.4574, "step": 3057 }, { "epoch": 0.680462839341344, "grad_norm": 1.4471008233485938, "learning_rate": 2.4502347861435717e-06, "loss": 0.4563, "step": 3058 }, { "epoch": 0.6806853582554517, "grad_norm": 1.2563548225909766, "learning_rate": 2.447135649312406e-06, "loss": 0.4383, "step": 3059 }, { "epoch": 0.6809078771695594, "grad_norm": 1.2929445473542291, "learning_rate": 2.4440378385123915e-06, "loss": 0.4394, "step": 3060 }, { "epoch": 0.6811303960836671, "grad_norm": 1.3171060734559503, "learning_rate": 2.4409413553526236e-06, "loss": 0.433, "step": 3061 }, { "epoch": 0.6813529149977748, "grad_norm": 1.4069325606590584, "learning_rate": 2.4378462014414994e-06, "loss": 0.4443, "step": 3062 }, { "epoch": 0.6815754339118825, "grad_norm": 1.265951827429294, "learning_rate": 2.434752378386739e-06, "loss": 0.4557, "step": 3063 }, { "epoch": 0.6817979528259902, "grad_norm": 1.2418538780965478, "learning_rate": 2.4316598877953557e-06, "loss": 0.4573, "step": 3064 }, { "epoch": 0.6820204717400979, "grad_norm": 1.3715855212790762, "learning_rate": 2.4285687312736815e-06, "loss": 0.4366, "step": 3065 }, { "epoch": 0.6822429906542056, "grad_norm": 1.3675383538503858, "learning_rate": 2.425478910427353e-06, "loss": 0.4432, "step": 3066 }, { "epoch": 0.6824655095683133, "grad_norm": 1.2294488182190495, "learning_rate": 2.422390426861309e-06, "loss": 0.4408, "step": 3067 }, { "epoch": 0.682688028482421, "grad_norm": 1.355479450917398, "learning_rate": 2.419303282179798e-06, "loss": 0.4393, "step": 3068 }, { "epoch": 0.6829105473965287, "grad_norm": 1.3511136001935444, "learning_rate": 2.4162174779863716e-06, "loss": 0.4361, "step": 3069 }, { "epoch": 0.6831330663106364, "grad_norm": 1.276653381968228, "learning_rate": 2.4131330158838855e-06, "loss": 0.4386, "step": 3070 }, { "epoch": 0.6833555852247442, "grad_norm": 1.307732864259587, "learning_rate": 2.4100498974744997e-06, "loss": 0.436, "step": 3071 }, { "epoch": 0.6835781041388518, "grad_norm": 1.2311814276414645, "learning_rate": 2.406968124359671e-06, "loss": 0.421, "step": 3072 }, { "epoch": 0.6838006230529595, "grad_norm": 1.3431506578523302, "learning_rate": 2.4038876981401632e-06, "loss": 0.4446, "step": 3073 }, { "epoch": 0.6840231419670671, "grad_norm": 1.2769818302832348, "learning_rate": 2.4008086204160375e-06, "loss": 0.4302, "step": 3074 }, { "epoch": 0.6842456608811749, "grad_norm": 1.1716472594682847, "learning_rate": 2.3977308927866554e-06, "loss": 0.4466, "step": 3075 }, { "epoch": 0.6844681797952826, "grad_norm": 1.2813133731173831, "learning_rate": 2.39465451685068e-06, "loss": 0.4569, "step": 3076 }, { "epoch": 0.6846906987093903, "grad_norm": 1.2656469010386235, "learning_rate": 2.391579494206065e-06, "loss": 0.4521, "step": 3077 }, { "epoch": 0.684913217623498, "grad_norm": 1.214140335339525, "learning_rate": 2.388505826450067e-06, "loss": 0.4575, "step": 3078 }, { "epoch": 0.6851357365376057, "grad_norm": 1.2572222571118077, "learning_rate": 2.385433515179241e-06, "loss": 0.4614, "step": 3079 }, { "epoch": 0.6853582554517134, "grad_norm": 1.2977856248794741, "learning_rate": 2.3823625619894275e-06, "loss": 0.4457, "step": 3080 }, { "epoch": 0.6855807743658211, "grad_norm": 1.263844762661797, "learning_rate": 2.3792929684757714e-06, "loss": 0.4426, "step": 3081 }, { "epoch": 0.6858032932799288, "grad_norm": 1.2182554658118288, "learning_rate": 2.376224736232708e-06, "loss": 0.4459, "step": 3082 }, { "epoch": 0.6860258121940365, "grad_norm": 1.3064643787808454, "learning_rate": 2.3731578668539666e-06, "loss": 0.4429, "step": 3083 }, { "epoch": 0.6862483311081442, "grad_norm": 1.2286476099038586, "learning_rate": 2.370092361932564e-06, "loss": 0.426, "step": 3084 }, { "epoch": 0.6864708500222519, "grad_norm": 1.4708465731425053, "learning_rate": 2.367028223060813e-06, "loss": 0.448, "step": 3085 }, { "epoch": 0.6866933689363596, "grad_norm": 1.1919159541969215, "learning_rate": 2.363965451830318e-06, "loss": 0.4361, "step": 3086 }, { "epoch": 0.6869158878504673, "grad_norm": 1.264295458566297, "learning_rate": 2.360904049831964e-06, "loss": 0.4307, "step": 3087 }, { "epoch": 0.6871384067645749, "grad_norm": 1.5530352969486039, "learning_rate": 2.3578440186559393e-06, "loss": 0.4422, "step": 3088 }, { "epoch": 0.6873609256786827, "grad_norm": 1.270678972138661, "learning_rate": 2.3547853598917066e-06, "loss": 0.4424, "step": 3089 }, { "epoch": 0.6875834445927904, "grad_norm": 1.2906578950259517, "learning_rate": 2.3517280751280224e-06, "loss": 0.4283, "step": 3090 }, { "epoch": 0.687805963506898, "grad_norm": 1.4022213703339528, "learning_rate": 2.348672165952931e-06, "loss": 0.4374, "step": 3091 }, { "epoch": 0.6880284824210058, "grad_norm": 1.2994901006262358, "learning_rate": 2.3456176339537552e-06, "loss": 0.434, "step": 3092 }, { "epoch": 0.6882510013351135, "grad_norm": 1.2620395862920168, "learning_rate": 2.3425644807171084e-06, "loss": 0.4415, "step": 3093 }, { "epoch": 0.6884735202492211, "grad_norm": 1.2859741169950674, "learning_rate": 2.3395127078288872e-06, "loss": 0.4365, "step": 3094 }, { "epoch": 0.6886960391633289, "grad_norm": 1.2576567857424852, "learning_rate": 2.336462316874269e-06, "loss": 0.4538, "step": 3095 }, { "epoch": 0.6889185580774366, "grad_norm": 1.3328810040878167, "learning_rate": 2.333413309437717e-06, "loss": 0.4494, "step": 3096 }, { "epoch": 0.6891410769915443, "grad_norm": 1.3621166134386289, "learning_rate": 2.33036568710297e-06, "loss": 0.4367, "step": 3097 }, { "epoch": 0.689363595905652, "grad_norm": 1.2623027333850851, "learning_rate": 2.327319451453052e-06, "loss": 0.453, "step": 3098 }, { "epoch": 0.6895861148197597, "grad_norm": 1.2933540579733118, "learning_rate": 2.3242746040702657e-06, "loss": 0.4735, "step": 3099 }, { "epoch": 0.6898086337338674, "grad_norm": 1.204678332657293, "learning_rate": 2.3212311465361918e-06, "loss": 0.4369, "step": 3100 }, { "epoch": 0.6900311526479751, "grad_norm": 1.2908078645341865, "learning_rate": 2.3181890804316928e-06, "loss": 0.4489, "step": 3101 }, { "epoch": 0.6902536715620827, "grad_norm": 1.2519668625580582, "learning_rate": 2.3151484073369e-06, "loss": 0.445, "step": 3102 }, { "epoch": 0.6904761904761905, "grad_norm": 1.2267073569552907, "learning_rate": 2.3121091288312315e-06, "loss": 0.4471, "step": 3103 }, { "epoch": 0.6906987093902982, "grad_norm": 1.2062971506895661, "learning_rate": 2.3090712464933714e-06, "loss": 0.4323, "step": 3104 }, { "epoch": 0.6909212283044058, "grad_norm": 1.2851926705744094, "learning_rate": 2.306034761901285e-06, "loss": 0.4405, "step": 3105 }, { "epoch": 0.6911437472185136, "grad_norm": 1.4795506648971746, "learning_rate": 2.30299967663221e-06, "loss": 0.4596, "step": 3106 }, { "epoch": 0.6913662661326213, "grad_norm": 1.2324247186424149, "learning_rate": 2.2999659922626568e-06, "loss": 0.4168, "step": 3107 }, { "epoch": 0.6915887850467289, "grad_norm": 1.4363258968771222, "learning_rate": 2.2969337103684107e-06, "loss": 0.4464, "step": 3108 }, { "epoch": 0.6918113039608367, "grad_norm": 1.3039311706025407, "learning_rate": 2.2939028325245216e-06, "loss": 0.4389, "step": 3109 }, { "epoch": 0.6920338228749444, "grad_norm": 1.2414789832570474, "learning_rate": 2.2908733603053167e-06, "loss": 0.4442, "step": 3110 }, { "epoch": 0.692256341789052, "grad_norm": 1.192902702810834, "learning_rate": 2.2878452952843918e-06, "loss": 0.4416, "step": 3111 }, { "epoch": 0.6924788607031598, "grad_norm": 1.341374796467815, "learning_rate": 2.28481863903461e-06, "loss": 0.4309, "step": 3112 }, { "epoch": 0.6927013796172675, "grad_norm": 1.2957606490913476, "learning_rate": 2.2817933931281057e-06, "loss": 0.4546, "step": 3113 }, { "epoch": 0.6929238985313751, "grad_norm": 1.345479927830949, "learning_rate": 2.278769559136275e-06, "loss": 0.4394, "step": 3114 }, { "epoch": 0.6931464174454829, "grad_norm": 1.2981879114265678, "learning_rate": 2.2757471386297857e-06, "loss": 0.4289, "step": 3115 }, { "epoch": 0.6933689363595905, "grad_norm": 1.2974772316647214, "learning_rate": 2.272726133178572e-06, "loss": 0.4393, "step": 3116 }, { "epoch": 0.6935914552736983, "grad_norm": 1.2872029610766562, "learning_rate": 2.269706544351828e-06, "loss": 0.4368, "step": 3117 }, { "epoch": 0.693813974187806, "grad_norm": 1.364944718571448, "learning_rate": 2.2666883737180158e-06, "loss": 0.4427, "step": 3118 }, { "epoch": 0.6940364931019136, "grad_norm": 1.282618613615363, "learning_rate": 2.26367162284486e-06, "loss": 0.4471, "step": 3119 }, { "epoch": 0.6942590120160214, "grad_norm": 1.2378190640158895, "learning_rate": 2.2606562932993486e-06, "loss": 0.4433, "step": 3120 }, { "epoch": 0.6944815309301291, "grad_norm": 1.2763700046012805, "learning_rate": 2.257642386647732e-06, "loss": 0.437, "step": 3121 }, { "epoch": 0.6947040498442367, "grad_norm": 1.1693682454649001, "learning_rate": 2.2546299044555166e-06, "loss": 0.4311, "step": 3122 }, { "epoch": 0.6949265687583445, "grad_norm": 1.3409212261087897, "learning_rate": 2.2516188482874745e-06, "loss": 0.4441, "step": 3123 }, { "epoch": 0.6951490876724522, "grad_norm": 1.283456172644633, "learning_rate": 2.248609219707631e-06, "loss": 0.4421, "step": 3124 }, { "epoch": 0.6953716065865598, "grad_norm": 1.322494913424991, "learning_rate": 2.2456010202792804e-06, "loss": 0.4336, "step": 3125 }, { "epoch": 0.6955941255006676, "grad_norm": 1.304203000285091, "learning_rate": 2.2425942515649624e-06, "loss": 0.4475, "step": 3126 }, { "epoch": 0.6958166444147753, "grad_norm": 1.2278812473836267, "learning_rate": 2.239588915126481e-06, "loss": 0.4283, "step": 3127 }, { "epoch": 0.6960391633288829, "grad_norm": 1.3384845698960028, "learning_rate": 2.2365850125248967e-06, "loss": 0.4423, "step": 3128 }, { "epoch": 0.6962616822429907, "grad_norm": 1.242961986149517, "learning_rate": 2.2335825453205183e-06, "loss": 0.4245, "step": 3129 }, { "epoch": 0.6964842011570983, "grad_norm": 1.306753236732545, "learning_rate": 2.2305815150729167e-06, "loss": 0.4466, "step": 3130 }, { "epoch": 0.696706720071206, "grad_norm": 1.3503274801619907, "learning_rate": 2.227581923340912e-06, "loss": 0.4582, "step": 3131 }, { "epoch": 0.6969292389853138, "grad_norm": 1.325725973565238, "learning_rate": 2.224583771682579e-06, "loss": 0.4452, "step": 3132 }, { "epoch": 0.6971517578994214, "grad_norm": 1.2173858551025474, "learning_rate": 2.221587061655246e-06, "loss": 0.4421, "step": 3133 }, { "epoch": 0.6973742768135291, "grad_norm": 1.2685482629547828, "learning_rate": 2.218591794815486e-06, "loss": 0.4328, "step": 3134 }, { "epoch": 0.6975967957276369, "grad_norm": 1.209316171724182, "learning_rate": 2.215597972719129e-06, "loss": 0.4155, "step": 3135 }, { "epoch": 0.6978193146417445, "grad_norm": 1.2234782178198123, "learning_rate": 2.2126055969212523e-06, "loss": 0.4432, "step": 3136 }, { "epoch": 0.6980418335558523, "grad_norm": 1.2756316104730463, "learning_rate": 2.209614668976182e-06, "loss": 0.44, "step": 3137 }, { "epoch": 0.69826435246996, "grad_norm": 1.2208457775105637, "learning_rate": 2.2066251904374937e-06, "loss": 0.4419, "step": 3138 }, { "epoch": 0.6984868713840676, "grad_norm": 1.2252459357731564, "learning_rate": 2.203637162858005e-06, "loss": 0.4382, "step": 3139 }, { "epoch": 0.6987093902981754, "grad_norm": 1.2855041258629027, "learning_rate": 2.200650587789785e-06, "loss": 0.4521, "step": 3140 }, { "epoch": 0.6989319092122831, "grad_norm": 1.430631045379642, "learning_rate": 2.1976654667841484e-06, "loss": 0.432, "step": 3141 }, { "epoch": 0.6991544281263907, "grad_norm": 1.6116074567031013, "learning_rate": 2.1946818013916484e-06, "loss": 0.4347, "step": 3142 }, { "epoch": 0.6993769470404985, "grad_norm": 1.3595784878718529, "learning_rate": 2.1916995931620923e-06, "loss": 0.436, "step": 3143 }, { "epoch": 0.6995994659546061, "grad_norm": 1.501842242378379, "learning_rate": 2.1887188436445207e-06, "loss": 0.4424, "step": 3144 }, { "epoch": 0.6998219848687138, "grad_norm": 1.2117636776875873, "learning_rate": 2.1857395543872234e-06, "loss": 0.4529, "step": 3145 }, { "epoch": 0.7000445037828216, "grad_norm": 1.322734394164856, "learning_rate": 2.1827617269377256e-06, "loss": 0.432, "step": 3146 }, { "epoch": 0.7002670226969292, "grad_norm": 1.4533998957756264, "learning_rate": 2.1797853628427985e-06, "loss": 0.4416, "step": 3147 }, { "epoch": 0.7004895416110369, "grad_norm": 1.3504935309097974, "learning_rate": 2.1768104636484503e-06, "loss": 0.4463, "step": 3148 }, { "epoch": 0.7007120605251447, "grad_norm": 1.2406030384926052, "learning_rate": 2.1738370308999307e-06, "loss": 0.4474, "step": 3149 }, { "epoch": 0.7009345794392523, "grad_norm": 1.3399948419093801, "learning_rate": 2.1708650661417268e-06, "loss": 0.4383, "step": 3150 }, { "epoch": 0.70115709835336, "grad_norm": 1.3413006604554971, "learning_rate": 2.16789457091756e-06, "loss": 0.4323, "step": 3151 }, { "epoch": 0.7013796172674678, "grad_norm": 1.4258392826419095, "learning_rate": 2.1649255467703912e-06, "loss": 0.4553, "step": 3152 }, { "epoch": 0.7016021361815754, "grad_norm": 1.2745055239811187, "learning_rate": 2.1619579952424204e-06, "loss": 0.4307, "step": 3153 }, { "epoch": 0.7018246550956831, "grad_norm": 1.2996250207742273, "learning_rate": 2.158991917875074e-06, "loss": 0.4407, "step": 3154 }, { "epoch": 0.7020471740097909, "grad_norm": 1.3840236262961867, "learning_rate": 2.156027316209021e-06, "loss": 0.4473, "step": 3155 }, { "epoch": 0.7022696929238985, "grad_norm": 1.239001818064171, "learning_rate": 2.15306419178416e-06, "loss": 0.4327, "step": 3156 }, { "epoch": 0.7024922118380063, "grad_norm": 1.2308055509428295, "learning_rate": 2.150102546139623e-06, "loss": 0.4535, "step": 3157 }, { "epoch": 0.7027147307521139, "grad_norm": 1.2951047270257274, "learning_rate": 2.1471423808137765e-06, "loss": 0.4535, "step": 3158 }, { "epoch": 0.7029372496662216, "grad_norm": 1.3138906735272684, "learning_rate": 2.144183697344211e-06, "loss": 0.4337, "step": 3159 }, { "epoch": 0.7031597685803294, "grad_norm": 1.2849576088250787, "learning_rate": 2.1412264972677537e-06, "loss": 0.4274, "step": 3160 }, { "epoch": 0.703382287494437, "grad_norm": 1.1661766584096211, "learning_rate": 2.13827078212046e-06, "loss": 0.4304, "step": 3161 }, { "epoch": 0.7036048064085447, "grad_norm": 1.2753447588447668, "learning_rate": 2.135316553437613e-06, "loss": 0.4409, "step": 3162 }, { "epoch": 0.7038273253226525, "grad_norm": 1.3566745880467725, "learning_rate": 2.1323638127537256e-06, "loss": 0.43, "step": 3163 }, { "epoch": 0.7040498442367601, "grad_norm": 1.2135722244584106, "learning_rate": 2.1294125616025323e-06, "loss": 0.4346, "step": 3164 }, { "epoch": 0.7042723631508678, "grad_norm": 1.2547980254968472, "learning_rate": 2.1264628015170026e-06, "loss": 0.4446, "step": 3165 }, { "epoch": 0.7044948820649756, "grad_norm": 1.3470531978507627, "learning_rate": 2.123514534029322e-06, "loss": 0.4633, "step": 3166 }, { "epoch": 0.7047174009790832, "grad_norm": 1.218759240698855, "learning_rate": 2.1205677606709056e-06, "loss": 0.4276, "step": 3167 }, { "epoch": 0.7049399198931909, "grad_norm": 1.334119410092192, "learning_rate": 2.117622482972398e-06, "loss": 0.4385, "step": 3168 }, { "epoch": 0.7051624388072987, "grad_norm": 1.26338533201369, "learning_rate": 2.1146787024636555e-06, "loss": 0.4388, "step": 3169 }, { "epoch": 0.7053849577214063, "grad_norm": 1.3149930812704917, "learning_rate": 2.111736420673767e-06, "loss": 0.4373, "step": 3170 }, { "epoch": 0.705607476635514, "grad_norm": 1.2818515735468927, "learning_rate": 2.108795639131032e-06, "loss": 0.424, "step": 3171 }, { "epoch": 0.7058299955496217, "grad_norm": 1.252495346108561, "learning_rate": 2.105856359362982e-06, "loss": 0.4415, "step": 3172 }, { "epoch": 0.7060525144637294, "grad_norm": 1.3263141753819345, "learning_rate": 2.1029185828963604e-06, "loss": 0.4486, "step": 3173 }, { "epoch": 0.7062750333778371, "grad_norm": 1.2467196478998022, "learning_rate": 2.0999823112571356e-06, "loss": 0.4398, "step": 3174 }, { "epoch": 0.7064975522919448, "grad_norm": 1.2804810020708286, "learning_rate": 2.097047545970491e-06, "loss": 0.4258, "step": 3175 }, { "epoch": 0.7067200712060525, "grad_norm": 1.2971361072157725, "learning_rate": 2.094114288560826e-06, "loss": 0.4354, "step": 3176 }, { "epoch": 0.7069425901201603, "grad_norm": 1.3837563579206027, "learning_rate": 2.09118254055176e-06, "loss": 0.4461, "step": 3177 }, { "epoch": 0.7071651090342679, "grad_norm": 1.2089680537981615, "learning_rate": 2.0882523034661297e-06, "loss": 0.4478, "step": 3178 }, { "epoch": 0.7073876279483756, "grad_norm": 1.1938008626923644, "learning_rate": 2.0853235788259773e-06, "loss": 0.4511, "step": 3179 }, { "epoch": 0.7076101468624834, "grad_norm": 1.2186427834472775, "learning_rate": 2.082396368152576e-06, "loss": 0.436, "step": 3180 }, { "epoch": 0.707832665776591, "grad_norm": 1.2856360383242011, "learning_rate": 2.0794706729663965e-06, "loss": 0.4425, "step": 3181 }, { "epoch": 0.7080551846906987, "grad_norm": 1.2351090966915648, "learning_rate": 2.076546494787131e-06, "loss": 0.4469, "step": 3182 }, { "epoch": 0.7082777036048065, "grad_norm": 1.272347628201188, "learning_rate": 2.073623835133684e-06, "loss": 0.4546, "step": 3183 }, { "epoch": 0.7085002225189141, "grad_norm": 1.238757877256215, "learning_rate": 2.0707026955241657e-06, "loss": 0.4297, "step": 3184 }, { "epoch": 0.7087227414330218, "grad_norm": 1.2189454969924198, "learning_rate": 2.0677830774759002e-06, "loss": 0.424, "step": 3185 }, { "epoch": 0.7089452603471295, "grad_norm": 1.261460671200296, "learning_rate": 2.064864982505422e-06, "loss": 0.4359, "step": 3186 }, { "epoch": 0.7091677792612372, "grad_norm": 1.3636122175184369, "learning_rate": 2.0619484121284754e-06, "loss": 0.4349, "step": 3187 }, { "epoch": 0.7093902981753449, "grad_norm": 1.2591396104678547, "learning_rate": 2.059033367860008e-06, "loss": 0.4299, "step": 3188 }, { "epoch": 0.7096128170894526, "grad_norm": 1.2814556999872497, "learning_rate": 2.0561198512141784e-06, "loss": 0.4523, "step": 3189 }, { "epoch": 0.7098353360035603, "grad_norm": 1.2930663641429152, "learning_rate": 2.0532078637043523e-06, "loss": 0.4248, "step": 3190 }, { "epoch": 0.710057854917668, "grad_norm": 1.1899824657135207, "learning_rate": 2.0502974068430957e-06, "loss": 0.4415, "step": 3191 }, { "epoch": 0.7102803738317757, "grad_norm": 1.3507114415457135, "learning_rate": 2.04738848214219e-06, "loss": 0.4409, "step": 3192 }, { "epoch": 0.7105028927458834, "grad_norm": 1.2095253508791715, "learning_rate": 2.0444810911126094e-06, "loss": 0.4332, "step": 3193 }, { "epoch": 0.7107254116599911, "grad_norm": 1.2399607305729092, "learning_rate": 2.041575235264538e-06, "loss": 0.4544, "step": 3194 }, { "epoch": 0.7109479305740988, "grad_norm": 1.2519173134793922, "learning_rate": 2.038670916107362e-06, "loss": 0.4137, "step": 3195 }, { "epoch": 0.7111704494882065, "grad_norm": 1.3693376376881794, "learning_rate": 2.0357681351496663e-06, "loss": 0.4556, "step": 3196 }, { "epoch": 0.7113929684023143, "grad_norm": 1.5052772928817324, "learning_rate": 2.0328668938992387e-06, "loss": 0.4525, "step": 3197 }, { "epoch": 0.7116154873164219, "grad_norm": 1.256136982035162, "learning_rate": 2.0299671938630685e-06, "loss": 0.432, "step": 3198 }, { "epoch": 0.7118380062305296, "grad_norm": 1.3640268123531614, "learning_rate": 2.027069036547343e-06, "loss": 0.4241, "step": 3199 }, { "epoch": 0.7120605251446372, "grad_norm": 1.2697535216459381, "learning_rate": 2.02417242345745e-06, "loss": 0.4236, "step": 3200 }, { "epoch": 0.712283044058745, "grad_norm": 1.2291367408237297, "learning_rate": 2.0212773560979704e-06, "loss": 0.4248, "step": 3201 }, { "epoch": 0.7125055629728527, "grad_norm": 1.6231605228883885, "learning_rate": 2.0183838359726873e-06, "loss": 0.434, "step": 3202 }, { "epoch": 0.7127280818869604, "grad_norm": 1.3065780883514495, "learning_rate": 2.0154918645845795e-06, "loss": 0.4516, "step": 3203 }, { "epoch": 0.7129506008010681, "grad_norm": 1.259385548412972, "learning_rate": 2.0126014434358152e-06, "loss": 0.422, "step": 3204 }, { "epoch": 0.7131731197151758, "grad_norm": 1.3655028847038782, "learning_rate": 2.0097125740277696e-06, "loss": 0.4489, "step": 3205 }, { "epoch": 0.7133956386292835, "grad_norm": 1.2070016158367598, "learning_rate": 2.006825257860999e-06, "loss": 0.4403, "step": 3206 }, { "epoch": 0.7136181575433912, "grad_norm": 1.3172034043536538, "learning_rate": 2.0039394964352615e-06, "loss": 0.4487, "step": 3207 }, { "epoch": 0.7138406764574989, "grad_norm": 1.217027867075359, "learning_rate": 2.001055291249501e-06, "loss": 0.4347, "step": 3208 }, { "epoch": 0.7140631953716066, "grad_norm": 1.3719041006385084, "learning_rate": 1.9981726438018596e-06, "loss": 0.4411, "step": 3209 }, { "epoch": 0.7142857142857143, "grad_norm": 1.2160983554109195, "learning_rate": 1.9952915555896664e-06, "loss": 0.448, "step": 3210 }, { "epoch": 0.7145082331998219, "grad_norm": 1.2439109761372045, "learning_rate": 1.992412028109441e-06, "loss": 0.4365, "step": 3211 }, { "epoch": 0.7147307521139297, "grad_norm": 1.236421715003102, "learning_rate": 1.9895340628568952e-06, "loss": 0.4392, "step": 3212 }, { "epoch": 0.7149532710280374, "grad_norm": 1.2972813296448855, "learning_rate": 1.986657661326924e-06, "loss": 0.4433, "step": 3213 }, { "epoch": 0.715175789942145, "grad_norm": 1.2579100400368854, "learning_rate": 1.9837828250136137e-06, "loss": 0.4275, "step": 3214 }, { "epoch": 0.7153983088562528, "grad_norm": 1.349577949612255, "learning_rate": 1.980909555410239e-06, "loss": 0.4145, "step": 3215 }, { "epoch": 0.7156208277703605, "grad_norm": 1.2232379633898198, "learning_rate": 1.9780378540092532e-06, "loss": 0.4386, "step": 3216 }, { "epoch": 0.7158433466844681, "grad_norm": 1.3113712144430953, "learning_rate": 1.9751677223023088e-06, "loss": 0.4515, "step": 3217 }, { "epoch": 0.7160658655985759, "grad_norm": 1.2427262105140078, "learning_rate": 1.972299161780228e-06, "loss": 0.4376, "step": 3218 }, { "epoch": 0.7162883845126836, "grad_norm": 1.193773016734356, "learning_rate": 1.969432173933026e-06, "loss": 0.4382, "step": 3219 }, { "epoch": 0.7165109034267912, "grad_norm": 1.2526129450379255, "learning_rate": 1.966566760249901e-06, "loss": 0.4364, "step": 3220 }, { "epoch": 0.716733422340899, "grad_norm": 1.2096218823571054, "learning_rate": 1.963702922219227e-06, "loss": 0.4264, "step": 3221 }, { "epoch": 0.7169559412550067, "grad_norm": 1.2571848408739708, "learning_rate": 1.9608406613285657e-06, "loss": 0.442, "step": 3222 }, { "epoch": 0.7171784601691144, "grad_norm": 1.3176251565309687, "learning_rate": 1.9579799790646587e-06, "loss": 0.4387, "step": 3223 }, { "epoch": 0.7174009790832221, "grad_norm": 1.2703659015697968, "learning_rate": 1.955120876913425e-06, "loss": 0.4428, "step": 3224 }, { "epoch": 0.7176234979973297, "grad_norm": 1.1703656347469802, "learning_rate": 1.952263356359967e-06, "loss": 0.4342, "step": 3225 }, { "epoch": 0.7178460169114375, "grad_norm": 1.3616674211293542, "learning_rate": 1.94940741888856e-06, "loss": 0.4288, "step": 3226 }, { "epoch": 0.7180685358255452, "grad_norm": 1.2305897741882583, "learning_rate": 1.9465530659826633e-06, "loss": 0.4293, "step": 3227 }, { "epoch": 0.7182910547396528, "grad_norm": 1.207039512773572, "learning_rate": 1.943700299124904e-06, "loss": 0.432, "step": 3228 }, { "epoch": 0.7185135736537606, "grad_norm": 2.3661244035751214, "learning_rate": 1.940849119797097e-06, "loss": 0.4471, "step": 3229 }, { "epoch": 0.7187360925678683, "grad_norm": 1.259036444797034, "learning_rate": 1.9379995294802264e-06, "loss": 0.4383, "step": 3230 }, { "epoch": 0.7189586114819759, "grad_norm": 1.262679980479535, "learning_rate": 1.935151529654449e-06, "loss": 0.4469, "step": 3231 }, { "epoch": 0.7191811303960837, "grad_norm": 1.277418327897686, "learning_rate": 1.9323051217990997e-06, "loss": 0.4305, "step": 3232 }, { "epoch": 0.7194036493101914, "grad_norm": 1.322838213816652, "learning_rate": 1.929460307392683e-06, "loss": 0.4511, "step": 3233 }, { "epoch": 0.719626168224299, "grad_norm": 1.24198145705717, "learning_rate": 1.9266170879128764e-06, "loss": 0.4677, "step": 3234 }, { "epoch": 0.7198486871384068, "grad_norm": 1.2650828792860742, "learning_rate": 1.9237754648365328e-06, "loss": 0.4287, "step": 3235 }, { "epoch": 0.7200712060525145, "grad_norm": 1.2406092480948536, "learning_rate": 1.9209354396396713e-06, "loss": 0.4381, "step": 3236 }, { "epoch": 0.7202937249666221, "grad_norm": 1.2742757289843976, "learning_rate": 1.918097013797484e-06, "loss": 0.4431, "step": 3237 }, { "epoch": 0.7205162438807299, "grad_norm": 1.3091699162938164, "learning_rate": 1.9152601887843285e-06, "loss": 0.4291, "step": 3238 }, { "epoch": 0.7207387627948375, "grad_norm": 1.2971144964033907, "learning_rate": 1.912424966073735e-06, "loss": 0.4452, "step": 3239 }, { "epoch": 0.7209612817089452, "grad_norm": 1.2809978206858068, "learning_rate": 1.9095913471384005e-06, "loss": 0.438, "step": 3240 }, { "epoch": 0.721183800623053, "grad_norm": 1.2592120668644842, "learning_rate": 1.906759333450184e-06, "loss": 0.4229, "step": 3241 }, { "epoch": 0.7214063195371606, "grad_norm": 1.2545541282164532, "learning_rate": 1.903928926480121e-06, "loss": 0.4312, "step": 3242 }, { "epoch": 0.7216288384512684, "grad_norm": 1.2191838005423443, "learning_rate": 1.901100127698401e-06, "loss": 0.4326, "step": 3243 }, { "epoch": 0.7218513573653761, "grad_norm": 1.4468762561873354, "learning_rate": 1.898272938574386e-06, "loss": 0.4363, "step": 3244 }, { "epoch": 0.7220738762794837, "grad_norm": 1.239675930613084, "learning_rate": 1.8954473605766005e-06, "loss": 0.4321, "step": 3245 }, { "epoch": 0.7222963951935915, "grad_norm": 1.2836337321140072, "learning_rate": 1.8926233951727273e-06, "loss": 0.424, "step": 3246 }, { "epoch": 0.7225189141076992, "grad_norm": 1.2026156605732392, "learning_rate": 1.8898010438296177e-06, "loss": 0.4319, "step": 3247 }, { "epoch": 0.7227414330218068, "grad_norm": 1.33180563226504, "learning_rate": 1.8869803080132815e-06, "loss": 0.4537, "step": 3248 }, { "epoch": 0.7229639519359146, "grad_norm": 1.2101559780717555, "learning_rate": 1.8841611891888918e-06, "loss": 0.4291, "step": 3249 }, { "epoch": 0.7231864708500223, "grad_norm": 1.2569659439206782, "learning_rate": 1.881343688820777e-06, "loss": 0.4389, "step": 3250 }, { "epoch": 0.7234089897641299, "grad_norm": 1.3021453611201204, "learning_rate": 1.8785278083724285e-06, "loss": 0.4106, "step": 3251 }, { "epoch": 0.7236315086782377, "grad_norm": 1.3029241963952023, "learning_rate": 1.8757135493064987e-06, "loss": 0.4387, "step": 3252 }, { "epoch": 0.7238540275923453, "grad_norm": 1.3534605072847032, "learning_rate": 1.8729009130847886e-06, "loss": 0.4385, "step": 3253 }, { "epoch": 0.724076546506453, "grad_norm": 1.2583742526148678, "learning_rate": 1.8700899011682688e-06, "loss": 0.4236, "step": 3254 }, { "epoch": 0.7242990654205608, "grad_norm": 1.2776942833490357, "learning_rate": 1.867280515017056e-06, "loss": 0.4321, "step": 3255 }, { "epoch": 0.7245215843346684, "grad_norm": 1.2565320417246093, "learning_rate": 1.8644727560904269e-06, "loss": 0.4326, "step": 3256 }, { "epoch": 0.7247441032487761, "grad_norm": 1.3418957259103612, "learning_rate": 1.8616666258468136e-06, "loss": 0.4542, "step": 3257 }, { "epoch": 0.7249666221628839, "grad_norm": 1.2956551962511103, "learning_rate": 1.8588621257437983e-06, "loss": 0.417, "step": 3258 }, { "epoch": 0.7251891410769915, "grad_norm": 1.280431795590769, "learning_rate": 1.8560592572381202e-06, "loss": 0.4242, "step": 3259 }, { "epoch": 0.7254116599910992, "grad_norm": 1.3609640763771715, "learning_rate": 1.8532580217856698e-06, "loss": 0.4236, "step": 3260 }, { "epoch": 0.725634178905207, "grad_norm": 1.3653719104090472, "learning_rate": 1.85045842084149e-06, "loss": 0.4303, "step": 3261 }, { "epoch": 0.7258566978193146, "grad_norm": 1.3230536879572559, "learning_rate": 1.847660455859775e-06, "loss": 0.4398, "step": 3262 }, { "epoch": 0.7260792167334224, "grad_norm": 1.335433909674095, "learning_rate": 1.8448641282938645e-06, "loss": 0.4458, "step": 3263 }, { "epoch": 0.7263017356475301, "grad_norm": 1.3231690880747928, "learning_rate": 1.8420694395962546e-06, "loss": 0.4272, "step": 3264 }, { "epoch": 0.7265242545616377, "grad_norm": 1.4154672913059798, "learning_rate": 1.8392763912185852e-06, "loss": 0.4351, "step": 3265 }, { "epoch": 0.7267467734757455, "grad_norm": 1.342541220013213, "learning_rate": 1.8364849846116472e-06, "loss": 0.4357, "step": 3266 }, { "epoch": 0.7269692923898531, "grad_norm": 1.3994307144766984, "learning_rate": 1.833695221225379e-06, "loss": 0.4482, "step": 3267 }, { "epoch": 0.7271918113039608, "grad_norm": 1.279269161092443, "learning_rate": 1.8309071025088604e-06, "loss": 0.4322, "step": 3268 }, { "epoch": 0.7274143302180686, "grad_norm": 1.2757225111056787, "learning_rate": 1.8281206299103231e-06, "loss": 0.4363, "step": 3269 }, { "epoch": 0.7276368491321762, "grad_norm": 1.2287660141312982, "learning_rate": 1.8253358048771386e-06, "loss": 0.4279, "step": 3270 }, { "epoch": 0.7278593680462839, "grad_norm": 1.2525664627017863, "learning_rate": 1.822552628855827e-06, "loss": 0.4479, "step": 3271 }, { "epoch": 0.7280818869603917, "grad_norm": 1.287071566177518, "learning_rate": 1.819771103292049e-06, "loss": 0.4263, "step": 3272 }, { "epoch": 0.7283044058744993, "grad_norm": 1.2752481096181107, "learning_rate": 1.8169912296306097e-06, "loss": 0.4285, "step": 3273 }, { "epoch": 0.728526924788607, "grad_norm": 1.2321577180678838, "learning_rate": 1.8142130093154569e-06, "loss": 0.4254, "step": 3274 }, { "epoch": 0.7287494437027148, "grad_norm": 1.3132359626863235, "learning_rate": 1.8114364437896747e-06, "loss": 0.4212, "step": 3275 }, { "epoch": 0.7289719626168224, "grad_norm": 1.3023495161542904, "learning_rate": 1.8086615344954928e-06, "loss": 0.4216, "step": 3276 }, { "epoch": 0.7291944815309301, "grad_norm": 1.2508216309179103, "learning_rate": 1.8058882828742803e-06, "loss": 0.4315, "step": 3277 }, { "epoch": 0.7294170004450379, "grad_norm": 1.3468096455259435, "learning_rate": 1.8031166903665392e-06, "loss": 0.4412, "step": 3278 }, { "epoch": 0.7296395193591455, "grad_norm": 1.2839979551424945, "learning_rate": 1.8003467584119205e-06, "loss": 0.4369, "step": 3279 }, { "epoch": 0.7298620382732532, "grad_norm": 1.3113800676855178, "learning_rate": 1.7975784884492026e-06, "loss": 0.437, "step": 3280 }, { "epoch": 0.7300845571873609, "grad_norm": 1.2638710997624516, "learning_rate": 1.7948118819163052e-06, "loss": 0.4317, "step": 3281 }, { "epoch": 0.7303070761014686, "grad_norm": 1.344345873738151, "learning_rate": 1.7920469402502849e-06, "loss": 0.4496, "step": 3282 }, { "epoch": 0.7305295950155763, "grad_norm": 1.3424227997824865, "learning_rate": 1.789283664887329e-06, "loss": 0.4248, "step": 3283 }, { "epoch": 0.730752113929684, "grad_norm": 1.3782235413791069, "learning_rate": 1.786522057262764e-06, "loss": 0.4367, "step": 3284 }, { "epoch": 0.7309746328437917, "grad_norm": 1.2540782485724378, "learning_rate": 1.7837621188110482e-06, "loss": 0.4172, "step": 3285 }, { "epoch": 0.7311971517578995, "grad_norm": 1.2406322285559253, "learning_rate": 1.781003850965773e-06, "loss": 0.4484, "step": 3286 }, { "epoch": 0.7314196706720071, "grad_norm": 1.1931366506200838, "learning_rate": 1.778247255159663e-06, "loss": 0.4161, "step": 3287 }, { "epoch": 0.7316421895861148, "grad_norm": 1.2499375173386926, "learning_rate": 1.7754923328245704e-06, "loss": 0.4263, "step": 3288 }, { "epoch": 0.7318647085002226, "grad_norm": 1.2539744249472324, "learning_rate": 1.7727390853914855e-06, "loss": 0.417, "step": 3289 }, { "epoch": 0.7320872274143302, "grad_norm": 1.2843703094575605, "learning_rate": 1.7699875142905177e-06, "loss": 0.4396, "step": 3290 }, { "epoch": 0.7323097463284379, "grad_norm": 1.290544996553914, "learning_rate": 1.7672376209509174e-06, "loss": 0.4453, "step": 3291 }, { "epoch": 0.7325322652425457, "grad_norm": 1.2638382602723592, "learning_rate": 1.7644894068010593e-06, "loss": 0.4317, "step": 3292 }, { "epoch": 0.7327547841566533, "grad_norm": 1.3018430404723276, "learning_rate": 1.7617428732684406e-06, "loss": 0.4533, "step": 3293 }, { "epoch": 0.732977303070761, "grad_norm": 1.266622757194652, "learning_rate": 1.7589980217796931e-06, "loss": 0.4389, "step": 3294 }, { "epoch": 0.7331998219848687, "grad_norm": 1.3134036674848446, "learning_rate": 1.7562548537605683e-06, "loss": 0.4423, "step": 3295 }, { "epoch": 0.7334223408989764, "grad_norm": 1.328440643759414, "learning_rate": 1.7535133706359486e-06, "loss": 0.4344, "step": 3296 }, { "epoch": 0.7336448598130841, "grad_norm": 1.2725186327309175, "learning_rate": 1.7507735738298392e-06, "loss": 0.4245, "step": 3297 }, { "epoch": 0.7338673787271918, "grad_norm": 1.2437853982741727, "learning_rate": 1.7480354647653692e-06, "loss": 0.4246, "step": 3298 }, { "epoch": 0.7340898976412995, "grad_norm": 1.3108112162455625, "learning_rate": 1.7452990448647927e-06, "loss": 0.4301, "step": 3299 }, { "epoch": 0.7343124165554072, "grad_norm": 1.4101757851562622, "learning_rate": 1.7425643155494814e-06, "loss": 0.4324, "step": 3300 }, { "epoch": 0.7345349354695149, "grad_norm": 1.3951873391166751, "learning_rate": 1.7398312782399346e-06, "loss": 0.4295, "step": 3301 }, { "epoch": 0.7347574543836226, "grad_norm": 1.3868248116114106, "learning_rate": 1.7370999343557698e-06, "loss": 0.4233, "step": 3302 }, { "epoch": 0.7349799732977303, "grad_norm": 1.3530049610505301, "learning_rate": 1.7343702853157262e-06, "loss": 0.4175, "step": 3303 }, { "epoch": 0.735202492211838, "grad_norm": 1.3877823962320042, "learning_rate": 1.731642332537663e-06, "loss": 0.4246, "step": 3304 }, { "epoch": 0.7354250111259457, "grad_norm": 1.3148843881271726, "learning_rate": 1.7289160774385543e-06, "loss": 0.4269, "step": 3305 }, { "epoch": 0.7356475300400535, "grad_norm": 1.3123764184755415, "learning_rate": 1.7261915214344976e-06, "loss": 0.4295, "step": 3306 }, { "epoch": 0.7358700489541611, "grad_norm": 1.3244642118256016, "learning_rate": 1.7234686659407064e-06, "loss": 0.437, "step": 3307 }, { "epoch": 0.7360925678682688, "grad_norm": 1.4343405999462278, "learning_rate": 1.7207475123715072e-06, "loss": 0.4566, "step": 3308 }, { "epoch": 0.7363150867823764, "grad_norm": 1.2561857390350075, "learning_rate": 1.7180280621403473e-06, "loss": 0.4286, "step": 3309 }, { "epoch": 0.7365376056964842, "grad_norm": 1.283223272297473, "learning_rate": 1.7153103166597868e-06, "loss": 0.4252, "step": 3310 }, { "epoch": 0.7367601246105919, "grad_norm": 1.4068010195141258, "learning_rate": 1.7125942773415017e-06, "loss": 0.4253, "step": 3311 }, { "epoch": 0.7369826435246996, "grad_norm": 1.2567937054484066, "learning_rate": 1.7098799455962817e-06, "loss": 0.4353, "step": 3312 }, { "epoch": 0.7372051624388073, "grad_norm": 1.3101710446662214, "learning_rate": 1.7071673228340257e-06, "loss": 0.4517, "step": 3313 }, { "epoch": 0.737427681352915, "grad_norm": 1.2330552996068067, "learning_rate": 1.7044564104637512e-06, "loss": 0.4363, "step": 3314 }, { "epoch": 0.7376502002670227, "grad_norm": 1.2780247844160373, "learning_rate": 1.7017472098935795e-06, "loss": 0.4416, "step": 3315 }, { "epoch": 0.7378727191811304, "grad_norm": 1.2505272581635112, "learning_rate": 1.6990397225307536e-06, "loss": 0.4179, "step": 3316 }, { "epoch": 0.7380952380952381, "grad_norm": 1.2994753927443743, "learning_rate": 1.6963339497816155e-06, "loss": 0.4412, "step": 3317 }, { "epoch": 0.7383177570093458, "grad_norm": 1.3248958415264214, "learning_rate": 1.6936298930516232e-06, "loss": 0.4257, "step": 3318 }, { "epoch": 0.7385402759234535, "grad_norm": 1.2971933986424764, "learning_rate": 1.6909275537453423e-06, "loss": 0.4369, "step": 3319 }, { "epoch": 0.7387627948375612, "grad_norm": 1.361746036694719, "learning_rate": 1.6882269332664437e-06, "loss": 0.4325, "step": 3320 }, { "epoch": 0.7389853137516689, "grad_norm": 1.2596828742790123, "learning_rate": 1.6855280330177072e-06, "loss": 0.4267, "step": 3321 }, { "epoch": 0.7392078326657766, "grad_norm": 1.3696366483586109, "learning_rate": 1.6828308544010202e-06, "loss": 0.4448, "step": 3322 }, { "epoch": 0.7394303515798842, "grad_norm": 1.3285970952038686, "learning_rate": 1.6801353988173752e-06, "loss": 0.4385, "step": 3323 }, { "epoch": 0.739652870493992, "grad_norm": 1.3346639264878966, "learning_rate": 1.6774416676668704e-06, "loss": 0.436, "step": 3324 }, { "epoch": 0.7398753894080997, "grad_norm": 1.4445628943482682, "learning_rate": 1.6747496623487042e-06, "loss": 0.4349, "step": 3325 }, { "epoch": 0.7400979083222073, "grad_norm": 1.336500351456551, "learning_rate": 1.6720593842611827e-06, "loss": 0.4358, "step": 3326 }, { "epoch": 0.7403204272363151, "grad_norm": 1.2495044055043425, "learning_rate": 1.669370834801714e-06, "loss": 0.433, "step": 3327 }, { "epoch": 0.7405429461504228, "grad_norm": 1.3588841952825237, "learning_rate": 1.6666840153668085e-06, "loss": 0.445, "step": 3328 }, { "epoch": 0.7407654650645304, "grad_norm": 1.3080514590932595, "learning_rate": 1.6639989273520785e-06, "loss": 0.4258, "step": 3329 }, { "epoch": 0.7409879839786382, "grad_norm": 1.4285289599262576, "learning_rate": 1.6613155721522328e-06, "loss": 0.4363, "step": 3330 }, { "epoch": 0.7412105028927459, "grad_norm": 1.299128948870658, "learning_rate": 1.6586339511610865e-06, "loss": 0.4133, "step": 3331 }, { "epoch": 0.7414330218068536, "grad_norm": 1.2892049697642414, "learning_rate": 1.6559540657715473e-06, "loss": 0.441, "step": 3332 }, { "epoch": 0.7416555407209613, "grad_norm": 1.2711295057471577, "learning_rate": 1.6532759173756251e-06, "loss": 0.4295, "step": 3333 }, { "epoch": 0.741878059635069, "grad_norm": 1.321360726193718, "learning_rate": 1.650599507364432e-06, "loss": 0.4183, "step": 3334 }, { "epoch": 0.7421005785491767, "grad_norm": 1.3993409618942483, "learning_rate": 1.647924837128167e-06, "loss": 0.4276, "step": 3335 }, { "epoch": 0.7423230974632844, "grad_norm": 1.2328212353305072, "learning_rate": 1.6452519080561348e-06, "loss": 0.439, "step": 3336 }, { "epoch": 0.742545616377392, "grad_norm": 1.3190713860588033, "learning_rate": 1.6425807215367273e-06, "loss": 0.4257, "step": 3337 }, { "epoch": 0.7427681352914998, "grad_norm": 1.295665024887501, "learning_rate": 1.6399112789574378e-06, "loss": 0.428, "step": 3338 }, { "epoch": 0.7429906542056075, "grad_norm": 1.3289049505516453, "learning_rate": 1.637243581704852e-06, "loss": 0.4514, "step": 3339 }, { "epoch": 0.7432131731197151, "grad_norm": 1.325615028014592, "learning_rate": 1.6345776311646478e-06, "loss": 0.4468, "step": 3340 }, { "epoch": 0.7434356920338229, "grad_norm": 1.3987250299090388, "learning_rate": 1.6319134287215982e-06, "loss": 0.4475, "step": 3341 }, { "epoch": 0.7436582109479306, "grad_norm": 1.3392348142486785, "learning_rate": 1.6292509757595642e-06, "loss": 0.4426, "step": 3342 }, { "epoch": 0.7438807298620382, "grad_norm": 1.3057899892785696, "learning_rate": 1.6265902736615007e-06, "loss": 0.4291, "step": 3343 }, { "epoch": 0.744103248776146, "grad_norm": 1.4083999942216123, "learning_rate": 1.6239313238094556e-06, "loss": 0.4267, "step": 3344 }, { "epoch": 0.7443257676902537, "grad_norm": 1.3109706059126045, "learning_rate": 1.6212741275845606e-06, "loss": 0.4356, "step": 3345 }, { "epoch": 0.7445482866043613, "grad_norm": 1.2783959516414019, "learning_rate": 1.6186186863670406e-06, "loss": 0.4378, "step": 3346 }, { "epoch": 0.7447708055184691, "grad_norm": 1.2652494289629934, "learning_rate": 1.6159650015362088e-06, "loss": 0.4354, "step": 3347 }, { "epoch": 0.7449933244325768, "grad_norm": 1.2706140841866647, "learning_rate": 1.6133130744704657e-06, "loss": 0.4247, "step": 3348 }, { "epoch": 0.7452158433466844, "grad_norm": 1.2556868094192166, "learning_rate": 1.6106629065472995e-06, "loss": 0.4401, "step": 3349 }, { "epoch": 0.7454383622607922, "grad_norm": 1.2862486647057592, "learning_rate": 1.6080144991432806e-06, "loss": 0.4268, "step": 3350 }, { "epoch": 0.7456608811748998, "grad_norm": 1.209563565235168, "learning_rate": 1.6053678536340705e-06, "loss": 0.427, "step": 3351 }, { "epoch": 0.7458834000890076, "grad_norm": 1.3594049300912385, "learning_rate": 1.6027229713944098e-06, "loss": 0.4474, "step": 3352 }, { "epoch": 0.7461059190031153, "grad_norm": 1.3029261443061255, "learning_rate": 1.60007985379813e-06, "loss": 0.4283, "step": 3353 }, { "epoch": 0.7463284379172229, "grad_norm": 1.3239403617923902, "learning_rate": 1.5974385022181422e-06, "loss": 0.4118, "step": 3354 }, { "epoch": 0.7465509568313307, "grad_norm": 1.3541644421651844, "learning_rate": 1.5947989180264379e-06, "loss": 0.4361, "step": 3355 }, { "epoch": 0.7467734757454384, "grad_norm": 1.302074845270356, "learning_rate": 1.592161102594096e-06, "loss": 0.4413, "step": 3356 }, { "epoch": 0.746995994659546, "grad_norm": 1.459501793055856, "learning_rate": 1.5895250572912696e-06, "loss": 0.4427, "step": 3357 }, { "epoch": 0.7472185135736538, "grad_norm": 1.2181389626294323, "learning_rate": 1.5868907834872e-06, "loss": 0.4159, "step": 3358 }, { "epoch": 0.7474410324877615, "grad_norm": 1.2280774239996863, "learning_rate": 1.5842582825502028e-06, "loss": 0.4238, "step": 3359 }, { "epoch": 0.7476635514018691, "grad_norm": 1.1959605342710882, "learning_rate": 1.5816275558476758e-06, "loss": 0.427, "step": 3360 }, { "epoch": 0.7478860703159769, "grad_norm": 1.2266600797195257, "learning_rate": 1.5789986047460953e-06, "loss": 0.4281, "step": 3361 }, { "epoch": 0.7481085892300846, "grad_norm": 1.2952641223906318, "learning_rate": 1.5763714306110106e-06, "loss": 0.4503, "step": 3362 }, { "epoch": 0.7483311081441922, "grad_norm": 1.300666472443915, "learning_rate": 1.5737460348070538e-06, "loss": 0.4363, "step": 3363 }, { "epoch": 0.7485536270583, "grad_norm": 1.3105823538441872, "learning_rate": 1.5711224186979307e-06, "loss": 0.4281, "step": 3364 }, { "epoch": 0.7487761459724076, "grad_norm": 1.285575502790552, "learning_rate": 1.568500583646423e-06, "loss": 0.4364, "step": 3365 }, { "epoch": 0.7489986648865153, "grad_norm": 1.4021034856071297, "learning_rate": 1.5658805310143887e-06, "loss": 0.4462, "step": 3366 }, { "epoch": 0.7492211838006231, "grad_norm": 1.2717617732095787, "learning_rate": 1.5632622621627553e-06, "loss": 0.4195, "step": 3367 }, { "epoch": 0.7494437027147307, "grad_norm": 1.3655554168885449, "learning_rate": 1.5606457784515282e-06, "loss": 0.4341, "step": 3368 }, { "epoch": 0.7496662216288384, "grad_norm": 1.2594678695533261, "learning_rate": 1.5580310812397865e-06, "loss": 0.433, "step": 3369 }, { "epoch": 0.7498887405429462, "grad_norm": 1.6431805618146207, "learning_rate": 1.5554181718856737e-06, "loss": 0.4257, "step": 3370 }, { "epoch": 0.7501112594570538, "grad_norm": 1.362220725969794, "learning_rate": 1.5528070517464171e-06, "loss": 0.4327, "step": 3371 }, { "epoch": 0.7503337783711616, "grad_norm": 1.3503905417227062, "learning_rate": 1.5501977221783021e-06, "loss": 0.4284, "step": 3372 }, { "epoch": 0.7505562972852693, "grad_norm": 1.3023814636848183, "learning_rate": 1.547590184536692e-06, "loss": 0.4354, "step": 3373 }, { "epoch": 0.7507788161993769, "grad_norm": 1.3233547701635617, "learning_rate": 1.5449844401760178e-06, "loss": 0.4231, "step": 3374 }, { "epoch": 0.7510013351134847, "grad_norm": 1.2948561024283687, "learning_rate": 1.5423804904497747e-06, "loss": 0.4305, "step": 3375 }, { "epoch": 0.7512238540275924, "grad_norm": 1.2809281415398377, "learning_rate": 1.5397783367105307e-06, "loss": 0.4212, "step": 3376 }, { "epoch": 0.7514463729417, "grad_norm": 1.3449989988984221, "learning_rate": 1.53717798030992e-06, "loss": 0.4195, "step": 3377 }, { "epoch": 0.7516688918558078, "grad_norm": 1.3148823989892864, "learning_rate": 1.5345794225986433e-06, "loss": 0.4173, "step": 3378 }, { "epoch": 0.7518914107699154, "grad_norm": 1.3399927581987687, "learning_rate": 1.5319826649264636e-06, "loss": 0.4068, "step": 3379 }, { "epoch": 0.7521139296840231, "grad_norm": 1.4565112906642865, "learning_rate": 1.5293877086422126e-06, "loss": 0.4186, "step": 3380 }, { "epoch": 0.7523364485981309, "grad_norm": 1.388958753576119, "learning_rate": 1.5267945550937869e-06, "loss": 0.4291, "step": 3381 }, { "epoch": 0.7525589675122385, "grad_norm": 1.3318167139817023, "learning_rate": 1.5242032056281419e-06, "loss": 0.4293, "step": 3382 }, { "epoch": 0.7527814864263462, "grad_norm": 1.3314799126516363, "learning_rate": 1.5216136615913006e-06, "loss": 0.429, "step": 3383 }, { "epoch": 0.753004005340454, "grad_norm": 1.2332789089272946, "learning_rate": 1.5190259243283468e-06, "loss": 0.4207, "step": 3384 }, { "epoch": 0.7532265242545616, "grad_norm": 1.341432574790588, "learning_rate": 1.5164399951834258e-06, "loss": 0.4235, "step": 3385 }, { "epoch": 0.7534490431686693, "grad_norm": 1.565181165160548, "learning_rate": 1.5138558754997445e-06, "loss": 0.4335, "step": 3386 }, { "epoch": 0.7536715620827771, "grad_norm": 1.2801962732457777, "learning_rate": 1.5112735666195666e-06, "loss": 0.4165, "step": 3387 }, { "epoch": 0.7538940809968847, "grad_norm": 1.2459719198673613, "learning_rate": 1.5086930698842183e-06, "loss": 0.4248, "step": 3388 }, { "epoch": 0.7541165999109924, "grad_norm": 1.3275406656829714, "learning_rate": 1.5061143866340844e-06, "loss": 0.4335, "step": 3389 }, { "epoch": 0.7543391188251002, "grad_norm": 1.2991761475577297, "learning_rate": 1.503537518208607e-06, "loss": 0.4223, "step": 3390 }, { "epoch": 0.7545616377392078, "grad_norm": 1.3794919094294464, "learning_rate": 1.5009624659462874e-06, "loss": 0.4315, "step": 3391 }, { "epoch": 0.7547841566533156, "grad_norm": 1.308630088685779, "learning_rate": 1.498389231184678e-06, "loss": 0.4252, "step": 3392 }, { "epoch": 0.7550066755674232, "grad_norm": 1.2893682991234947, "learning_rate": 1.4958178152603954e-06, "loss": 0.4131, "step": 3393 }, { "epoch": 0.7552291944815309, "grad_norm": 1.3926579220506843, "learning_rate": 1.4932482195091024e-06, "loss": 0.4442, "step": 3394 }, { "epoch": 0.7554517133956387, "grad_norm": 1.2966448659053553, "learning_rate": 1.4906804452655216e-06, "loss": 0.4324, "step": 3395 }, { "epoch": 0.7556742323097463, "grad_norm": 1.3094323064993036, "learning_rate": 1.4881144938634334e-06, "loss": 0.4317, "step": 3396 }, { "epoch": 0.755896751223854, "grad_norm": 1.3463450120345084, "learning_rate": 1.485550366635662e-06, "loss": 0.4183, "step": 3397 }, { "epoch": 0.7561192701379618, "grad_norm": 1.39726110102709, "learning_rate": 1.482988064914092e-06, "loss": 0.4374, "step": 3398 }, { "epoch": 0.7563417890520694, "grad_norm": 1.4082802716815959, "learning_rate": 1.4804275900296533e-06, "loss": 0.4548, "step": 3399 }, { "epoch": 0.7565643079661771, "grad_norm": 1.425491511629156, "learning_rate": 1.4778689433123321e-06, "loss": 0.4283, "step": 3400 }, { "epoch": 0.7567868268802849, "grad_norm": 1.2427958887300623, "learning_rate": 1.475312126091163e-06, "loss": 0.4279, "step": 3401 }, { "epoch": 0.7570093457943925, "grad_norm": 1.3240681824035636, "learning_rate": 1.4727571396942303e-06, "loss": 0.4289, "step": 3402 }, { "epoch": 0.7572318647085002, "grad_norm": 1.3772981971635812, "learning_rate": 1.4702039854486683e-06, "loss": 0.4325, "step": 3403 }, { "epoch": 0.757454383622608, "grad_norm": 1.3760606142984342, "learning_rate": 1.4676526646806566e-06, "loss": 0.4413, "step": 3404 }, { "epoch": 0.7576769025367156, "grad_norm": 1.2627031472041375, "learning_rate": 1.4651031787154263e-06, "loss": 0.4409, "step": 3405 }, { "epoch": 0.7578994214508233, "grad_norm": 1.3745102842718786, "learning_rate": 1.4625555288772543e-06, "loss": 0.4283, "step": 3406 }, { "epoch": 0.758121940364931, "grad_norm": 1.3786230076636854, "learning_rate": 1.460009716489459e-06, "loss": 0.442, "step": 3407 }, { "epoch": 0.7583444592790387, "grad_norm": 1.3201915435255362, "learning_rate": 1.4574657428744144e-06, "loss": 0.4353, "step": 3408 }, { "epoch": 0.7585669781931464, "grad_norm": 1.2561912632315295, "learning_rate": 1.4549236093535296e-06, "loss": 0.4437, "step": 3409 }, { "epoch": 0.7587894971072541, "grad_norm": 1.339748169878727, "learning_rate": 1.452383317247263e-06, "loss": 0.4192, "step": 3410 }, { "epoch": 0.7590120160213618, "grad_norm": 1.3015496684507926, "learning_rate": 1.4498448678751164e-06, "loss": 0.424, "step": 3411 }, { "epoch": 0.7592345349354696, "grad_norm": 1.3529396150474036, "learning_rate": 1.4473082625556318e-06, "loss": 0.4366, "step": 3412 }, { "epoch": 0.7594570538495772, "grad_norm": 1.197991262563366, "learning_rate": 1.4447735026063946e-06, "loss": 0.437, "step": 3413 }, { "epoch": 0.7596795727636849, "grad_norm": 1.2968406608994285, "learning_rate": 1.442240589344034e-06, "loss": 0.4357, "step": 3414 }, { "epoch": 0.7599020916777927, "grad_norm": 1.3441316667594863, "learning_rate": 1.4397095240842162e-06, "loss": 0.4372, "step": 3415 }, { "epoch": 0.7601246105919003, "grad_norm": 1.344844914904635, "learning_rate": 1.437180308141652e-06, "loss": 0.4255, "step": 3416 }, { "epoch": 0.760347129506008, "grad_norm": 1.275562303043665, "learning_rate": 1.4346529428300849e-06, "loss": 0.4228, "step": 3417 }, { "epoch": 0.7605696484201158, "grad_norm": 1.3482815879192085, "learning_rate": 1.432127429462305e-06, "loss": 0.4345, "step": 3418 }, { "epoch": 0.7607921673342234, "grad_norm": 1.2635779518423214, "learning_rate": 1.429603769350133e-06, "loss": 0.4328, "step": 3419 }, { "epoch": 0.7610146862483311, "grad_norm": 1.347839379219385, "learning_rate": 1.4270819638044324e-06, "loss": 0.4274, "step": 3420 }, { "epoch": 0.7612372051624388, "grad_norm": 1.2611877338202269, "learning_rate": 1.4245620141351013e-06, "loss": 0.421, "step": 3421 }, { "epoch": 0.7614597240765465, "grad_norm": 1.2096579935948202, "learning_rate": 1.4220439216510739e-06, "loss": 0.4307, "step": 3422 }, { "epoch": 0.7616822429906542, "grad_norm": 1.344051578471167, "learning_rate": 1.4195276876603213e-06, "loss": 0.4361, "step": 3423 }, { "epoch": 0.7619047619047619, "grad_norm": 1.3496808765238666, "learning_rate": 1.4170133134698454e-06, "loss": 0.4505, "step": 3424 }, { "epoch": 0.7621272808188696, "grad_norm": 1.439355882179149, "learning_rate": 1.4145008003856858e-06, "loss": 0.4255, "step": 3425 }, { "epoch": 0.7623497997329773, "grad_norm": 1.3045091629958037, "learning_rate": 1.4119901497129135e-06, "loss": 0.4368, "step": 3426 }, { "epoch": 0.762572318647085, "grad_norm": 1.2164155108008694, "learning_rate": 1.4094813627556325e-06, "loss": 0.4118, "step": 3427 }, { "epoch": 0.7627948375611927, "grad_norm": 1.264212963559343, "learning_rate": 1.4069744408169816e-06, "loss": 0.4209, "step": 3428 }, { "epoch": 0.7630173564753004, "grad_norm": 1.4147021456691358, "learning_rate": 1.4044693851991238e-06, "loss": 0.4392, "step": 3429 }, { "epoch": 0.7632398753894081, "grad_norm": 1.2721260641675718, "learning_rate": 1.4019661972032595e-06, "loss": 0.4366, "step": 3430 }, { "epoch": 0.7634623943035158, "grad_norm": 1.4386678586684978, "learning_rate": 1.3994648781296178e-06, "loss": 0.4359, "step": 3431 }, { "epoch": 0.7636849132176236, "grad_norm": 2.8064990946446318, "learning_rate": 1.3969654292774503e-06, "loss": 0.4099, "step": 3432 }, { "epoch": 0.7639074321317312, "grad_norm": 1.32367620300394, "learning_rate": 1.39446785194505e-06, "loss": 0.4144, "step": 3433 }, { "epoch": 0.7641299510458389, "grad_norm": 1.3876991515222938, "learning_rate": 1.3919721474297249e-06, "loss": 0.4303, "step": 3434 }, { "epoch": 0.7643524699599465, "grad_norm": 1.378977027059023, "learning_rate": 1.389478317027818e-06, "loss": 0.429, "step": 3435 }, { "epoch": 0.7645749888740543, "grad_norm": 1.2934209178686011, "learning_rate": 1.3869863620346973e-06, "loss": 0.4151, "step": 3436 }, { "epoch": 0.764797507788162, "grad_norm": 1.2560183620451535, "learning_rate": 1.3844962837447535e-06, "loss": 0.419, "step": 3437 }, { "epoch": 0.7650200267022697, "grad_norm": 1.2714858079298483, "learning_rate": 1.382008083451406e-06, "loss": 0.4361, "step": 3438 }, { "epoch": 0.7652425456163774, "grad_norm": 1.3959547212466368, "learning_rate": 1.3795217624470975e-06, "loss": 0.4386, "step": 3439 }, { "epoch": 0.7654650645304851, "grad_norm": 1.2852521805453385, "learning_rate": 1.3770373220232957e-06, "loss": 0.4286, "step": 3440 }, { "epoch": 0.7656875834445928, "grad_norm": 1.3830073431097922, "learning_rate": 1.3745547634704887e-06, "loss": 0.4185, "step": 3441 }, { "epoch": 0.7659101023587005, "grad_norm": 1.3811661861281674, "learning_rate": 1.3720740880781892e-06, "loss": 0.4421, "step": 3442 }, { "epoch": 0.7661326212728082, "grad_norm": 1.3516619040470539, "learning_rate": 1.3695952971349336e-06, "loss": 0.4359, "step": 3443 }, { "epoch": 0.7663551401869159, "grad_norm": 1.2200843969821769, "learning_rate": 1.3671183919282716e-06, "loss": 0.4248, "step": 3444 }, { "epoch": 0.7665776591010236, "grad_norm": 1.3214282540186864, "learning_rate": 1.3646433737447863e-06, "loss": 0.4322, "step": 3445 }, { "epoch": 0.7668001780151313, "grad_norm": 1.284713214675395, "learning_rate": 1.3621702438700678e-06, "loss": 0.4371, "step": 3446 }, { "epoch": 0.767022696929239, "grad_norm": 1.3163447542092586, "learning_rate": 1.3596990035887332e-06, "loss": 0.4434, "step": 3447 }, { "epoch": 0.7672452158433467, "grad_norm": 1.444521219257108, "learning_rate": 1.357229654184416e-06, "loss": 0.4334, "step": 3448 }, { "epoch": 0.7674677347574543, "grad_norm": 1.2227412870549075, "learning_rate": 1.3547621969397657e-06, "loss": 0.4312, "step": 3449 }, { "epoch": 0.7676902536715621, "grad_norm": 1.2565732921883601, "learning_rate": 1.3522966331364512e-06, "loss": 0.4238, "step": 3450 }, { "epoch": 0.7679127725856698, "grad_norm": 1.3190656258223652, "learning_rate": 1.3498329640551576e-06, "loss": 0.4392, "step": 3451 }, { "epoch": 0.7681352914997774, "grad_norm": 1.2492539892999899, "learning_rate": 1.3473711909755853e-06, "loss": 0.4321, "step": 3452 }, { "epoch": 0.7683578104138852, "grad_norm": 1.2439354774613895, "learning_rate": 1.344911315176452e-06, "loss": 0.4362, "step": 3453 }, { "epoch": 0.7685803293279929, "grad_norm": 1.3158670742764218, "learning_rate": 1.3424533379354842e-06, "loss": 0.4437, "step": 3454 }, { "epoch": 0.7688028482421005, "grad_norm": 1.3031008755374902, "learning_rate": 1.3399972605294277e-06, "loss": 0.4286, "step": 3455 }, { "epoch": 0.7690253671562083, "grad_norm": 1.3652236358371497, "learning_rate": 1.3375430842340415e-06, "loss": 0.4278, "step": 3456 }, { "epoch": 0.769247886070316, "grad_norm": 1.2715211989329123, "learning_rate": 1.3350908103240905e-06, "loss": 0.4306, "step": 3457 }, { "epoch": 0.7694704049844237, "grad_norm": 1.2297060250279952, "learning_rate": 1.3326404400733623e-06, "loss": 0.4356, "step": 3458 }, { "epoch": 0.7696929238985314, "grad_norm": 1.301822531605812, "learning_rate": 1.3301919747546455e-06, "loss": 0.4142, "step": 3459 }, { "epoch": 0.7699154428126391, "grad_norm": 1.3642123858180077, "learning_rate": 1.3277454156397457e-06, "loss": 0.413, "step": 3460 }, { "epoch": 0.7701379617267468, "grad_norm": 1.2625959411291998, "learning_rate": 1.3253007639994743e-06, "loss": 0.4209, "step": 3461 }, { "epoch": 0.7703604806408545, "grad_norm": 1.268111517021224, "learning_rate": 1.3228580211036541e-06, "loss": 0.4337, "step": 3462 }, { "epoch": 0.7705829995549621, "grad_norm": 1.3683817667970408, "learning_rate": 1.3204171882211158e-06, "loss": 0.4189, "step": 3463 }, { "epoch": 0.7708055184690699, "grad_norm": 1.2717555838941879, "learning_rate": 1.3179782666196993e-06, "loss": 0.438, "step": 3464 }, { "epoch": 0.7710280373831776, "grad_norm": 1.3191145227729177, "learning_rate": 1.3155412575662513e-06, "loss": 0.4089, "step": 3465 }, { "epoch": 0.7712505562972852, "grad_norm": 1.4245436376824434, "learning_rate": 1.3131061623266217e-06, "loss": 0.426, "step": 3466 }, { "epoch": 0.771473075211393, "grad_norm": 1.3182897881302398, "learning_rate": 1.3106729821656706e-06, "loss": 0.4206, "step": 3467 }, { "epoch": 0.7716955941255007, "grad_norm": 1.3594551186417991, "learning_rate": 1.3082417183472623e-06, "loss": 0.4249, "step": 3468 }, { "epoch": 0.7719181130396083, "grad_norm": 1.2678409939181428, "learning_rate": 1.305812372134262e-06, "loss": 0.432, "step": 3469 }, { "epoch": 0.7721406319537161, "grad_norm": 1.4188524078027356, "learning_rate": 1.3033849447885471e-06, "loss": 0.4339, "step": 3470 }, { "epoch": 0.7723631508678238, "grad_norm": 1.2897261402969153, "learning_rate": 1.3009594375709888e-06, "loss": 0.4492, "step": 3471 }, { "epoch": 0.7725856697819314, "grad_norm": 1.3611587333125137, "learning_rate": 1.298535851741467e-06, "loss": 0.4376, "step": 3472 }, { "epoch": 0.7728081886960392, "grad_norm": 1.3453962549233625, "learning_rate": 1.2961141885588634e-06, "loss": 0.4293, "step": 3473 }, { "epoch": 0.7730307076101469, "grad_norm": 1.3739028644691471, "learning_rate": 1.293694449281056e-06, "loss": 0.4156, "step": 3474 }, { "epoch": 0.7732532265242545, "grad_norm": 1.2859371306790874, "learning_rate": 1.2912766351649293e-06, "loss": 0.4329, "step": 3475 }, { "epoch": 0.7734757454383623, "grad_norm": 1.4235795096668478, "learning_rate": 1.288860747466365e-06, "loss": 0.4459, "step": 3476 }, { "epoch": 0.7736982643524699, "grad_norm": 1.317420675016825, "learning_rate": 1.2864467874402442e-06, "loss": 0.4254, "step": 3477 }, { "epoch": 0.7739207832665776, "grad_norm": 1.2836796177145777, "learning_rate": 1.2840347563404492e-06, "loss": 0.4222, "step": 3478 }, { "epoch": 0.7741433021806854, "grad_norm": 1.2752943127811607, "learning_rate": 1.2816246554198557e-06, "loss": 0.4382, "step": 3479 }, { "epoch": 0.774365821094793, "grad_norm": 1.2616430600851332, "learning_rate": 1.2792164859303413e-06, "loss": 0.4272, "step": 3480 }, { "epoch": 0.7745883400089008, "grad_norm": 1.343968293191336, "learning_rate": 1.2768102491227751e-06, "loss": 0.4163, "step": 3481 }, { "epoch": 0.7748108589230085, "grad_norm": 1.230222085487637, "learning_rate": 1.274405946247031e-06, "loss": 0.4166, "step": 3482 }, { "epoch": 0.7750333778371161, "grad_norm": 1.2651447797989155, "learning_rate": 1.2720035785519685e-06, "loss": 0.4157, "step": 3483 }, { "epoch": 0.7752558967512239, "grad_norm": 1.2448625721266553, "learning_rate": 1.2696031472854486e-06, "loss": 0.4049, "step": 3484 }, { "epoch": 0.7754784156653316, "grad_norm": 1.2614437395941291, "learning_rate": 1.2672046536943256e-06, "loss": 0.4202, "step": 3485 }, { "epoch": 0.7757009345794392, "grad_norm": 1.245561170781161, "learning_rate": 1.2648080990244426e-06, "loss": 0.4334, "step": 3486 }, { "epoch": 0.775923453493547, "grad_norm": 1.2844541666549494, "learning_rate": 1.2624134845206408e-06, "loss": 0.4289, "step": 3487 }, { "epoch": 0.7761459724076547, "grad_norm": 1.303865631258977, "learning_rate": 1.260020811426752e-06, "loss": 0.421, "step": 3488 }, { "epoch": 0.7763684913217623, "grad_norm": 1.4272804003364976, "learning_rate": 1.257630080985599e-06, "loss": 0.4184, "step": 3489 }, { "epoch": 0.7765910102358701, "grad_norm": 1.3296105283957864, "learning_rate": 1.2552412944389974e-06, "loss": 0.4291, "step": 3490 }, { "epoch": 0.7768135291499777, "grad_norm": 1.2932810785924884, "learning_rate": 1.2528544530277492e-06, "loss": 0.422, "step": 3491 }, { "epoch": 0.7770360480640854, "grad_norm": 1.3944388776561543, "learning_rate": 1.2504695579916498e-06, "loss": 0.4439, "step": 3492 }, { "epoch": 0.7772585669781932, "grad_norm": 1.300330950657772, "learning_rate": 1.2480866105694838e-06, "loss": 0.4209, "step": 3493 }, { "epoch": 0.7774810858923008, "grad_norm": 1.3909902561400427, "learning_rate": 1.2457056119990175e-06, "loss": 0.4042, "step": 3494 }, { "epoch": 0.7777036048064085, "grad_norm": 1.4196463011864415, "learning_rate": 1.2433265635170166e-06, "loss": 0.4465, "step": 3495 }, { "epoch": 0.7779261237205163, "grad_norm": 1.3903544895073625, "learning_rate": 1.240949466359223e-06, "loss": 0.4368, "step": 3496 }, { "epoch": 0.7781486426346239, "grad_norm": 1.3153429225046283, "learning_rate": 1.2385743217603703e-06, "loss": 0.4508, "step": 3497 }, { "epoch": 0.7783711615487316, "grad_norm": 1.4929909013377638, "learning_rate": 1.2362011309541784e-06, "loss": 0.4413, "step": 3498 }, { "epoch": 0.7785936804628394, "grad_norm": 1.3550698389621574, "learning_rate": 1.233829895173348e-06, "loss": 0.4346, "step": 3499 }, { "epoch": 0.778816199376947, "grad_norm": 1.2307092152852397, "learning_rate": 1.2314606156495683e-06, "loss": 0.4035, "step": 3500 }, { "epoch": 0.7790387182910548, "grad_norm": 1.274454382082097, "learning_rate": 1.2290932936135125e-06, "loss": 0.4187, "step": 3501 }, { "epoch": 0.7792612372051625, "grad_norm": 1.2950251206818824, "learning_rate": 1.226727930294836e-06, "loss": 0.4115, "step": 3502 }, { "epoch": 0.7794837561192701, "grad_norm": 1.4003549852709345, "learning_rate": 1.2243645269221732e-06, "loss": 0.4127, "step": 3503 }, { "epoch": 0.7797062750333779, "grad_norm": 1.4587085141324676, "learning_rate": 1.2220030847231468e-06, "loss": 0.4413, "step": 3504 }, { "epoch": 0.7799287939474855, "grad_norm": 1.3154264713487658, "learning_rate": 1.219643604924358e-06, "loss": 0.4426, "step": 3505 }, { "epoch": 0.7801513128615932, "grad_norm": 1.2680394635579435, "learning_rate": 1.2172860887513844e-06, "loss": 0.4201, "step": 3506 }, { "epoch": 0.780373831775701, "grad_norm": 1.2913754667700312, "learning_rate": 1.2149305374287934e-06, "loss": 0.425, "step": 3507 }, { "epoch": 0.7805963506898086, "grad_norm": 1.4484010040266582, "learning_rate": 1.2125769521801223e-06, "loss": 0.4362, "step": 3508 }, { "epoch": 0.7808188696039163, "grad_norm": 1.4199840999290418, "learning_rate": 1.210225334227892e-06, "loss": 0.4344, "step": 3509 }, { "epoch": 0.7810413885180241, "grad_norm": 1.3452627941917317, "learning_rate": 1.207875684793602e-06, "loss": 0.4238, "step": 3510 }, { "epoch": 0.7812639074321317, "grad_norm": 1.265747825451701, "learning_rate": 1.205528005097724e-06, "loss": 0.4271, "step": 3511 }, { "epoch": 0.7814864263462394, "grad_norm": 1.43881357654444, "learning_rate": 1.2031822963597134e-06, "loss": 0.4386, "step": 3512 }, { "epoch": 0.7817089452603472, "grad_norm": 1.3224872163234271, "learning_rate": 1.2008385597979982e-06, "loss": 0.4146, "step": 3513 }, { "epoch": 0.7819314641744548, "grad_norm": 1.382846917480653, "learning_rate": 1.198496796629982e-06, "loss": 0.4336, "step": 3514 }, { "epoch": 0.7821539830885625, "grad_norm": 1.3255298331427223, "learning_rate": 1.1961570080720459e-06, "loss": 0.4383, "step": 3515 }, { "epoch": 0.7823765020026703, "grad_norm": 1.2951894574328442, "learning_rate": 1.1938191953395401e-06, "loss": 0.4328, "step": 3516 }, { "epoch": 0.7825990209167779, "grad_norm": 1.4152265144035778, "learning_rate": 1.191483359646793e-06, "loss": 0.4098, "step": 3517 }, { "epoch": 0.7828215398308856, "grad_norm": 1.2848717582369062, "learning_rate": 1.1891495022071059e-06, "loss": 0.4289, "step": 3518 }, { "epoch": 0.7830440587449933, "grad_norm": 1.3691668873627572, "learning_rate": 1.1868176242327507e-06, "loss": 0.4126, "step": 3519 }, { "epoch": 0.783266577659101, "grad_norm": 1.371681184484915, "learning_rate": 1.184487726934973e-06, "loss": 0.4238, "step": 3520 }, { "epoch": 0.7834890965732088, "grad_norm": 1.336493540497576, "learning_rate": 1.1821598115239863e-06, "loss": 0.403, "step": 3521 }, { "epoch": 0.7837116154873164, "grad_norm": 1.399783887209758, "learning_rate": 1.1798338792089792e-06, "loss": 0.4347, "step": 3522 }, { "epoch": 0.7839341344014241, "grad_norm": 1.4285202192846032, "learning_rate": 1.1775099311981052e-06, "loss": 0.411, "step": 3523 }, { "epoch": 0.7841566533155319, "grad_norm": 1.283221782831577, "learning_rate": 1.1751879686984896e-06, "loss": 0.4366, "step": 3524 }, { "epoch": 0.7843791722296395, "grad_norm": 1.3083685427320995, "learning_rate": 1.1728679929162313e-06, "loss": 0.4206, "step": 3525 }, { "epoch": 0.7846016911437472, "grad_norm": 1.3025120725461865, "learning_rate": 1.1705500050563873e-06, "loss": 0.4309, "step": 3526 }, { "epoch": 0.784824210057855, "grad_norm": 1.3438207347594986, "learning_rate": 1.1682340063229902e-06, "loss": 0.4105, "step": 3527 }, { "epoch": 0.7850467289719626, "grad_norm": 1.334212454671534, "learning_rate": 1.1659199979190339e-06, "loss": 0.4217, "step": 3528 }, { "epoch": 0.7852692478860703, "grad_norm": 1.2894984590827059, "learning_rate": 1.1636079810464818e-06, "loss": 0.4432, "step": 3529 }, { "epoch": 0.7854917668001781, "grad_norm": 1.315106631215817, "learning_rate": 1.1612979569062638e-06, "loss": 0.4181, "step": 3530 }, { "epoch": 0.7857142857142857, "grad_norm": 1.2902865739039662, "learning_rate": 1.1589899266982691e-06, "loss": 0.4393, "step": 3531 }, { "epoch": 0.7859368046283934, "grad_norm": 1.455197366874215, "learning_rate": 1.156683891621359e-06, "loss": 0.4215, "step": 3532 }, { "epoch": 0.7861593235425011, "grad_norm": 1.292328061548586, "learning_rate": 1.1543798528733518e-06, "loss": 0.4135, "step": 3533 }, { "epoch": 0.7863818424566088, "grad_norm": 1.3252761919910503, "learning_rate": 1.1520778116510323e-06, "loss": 0.4359, "step": 3534 }, { "epoch": 0.7866043613707165, "grad_norm": 1.2586410102796066, "learning_rate": 1.1497777691501484e-06, "loss": 0.4195, "step": 3535 }, { "epoch": 0.7868268802848242, "grad_norm": 1.4058911259233047, "learning_rate": 1.1474797265654048e-06, "loss": 0.421, "step": 3536 }, { "epoch": 0.7870493991989319, "grad_norm": 1.249694190542169, "learning_rate": 1.1451836850904736e-06, "loss": 0.4412, "step": 3537 }, { "epoch": 0.7872719181130396, "grad_norm": 1.4205671216967792, "learning_rate": 1.1428896459179833e-06, "loss": 0.4263, "step": 3538 }, { "epoch": 0.7874944370271473, "grad_norm": 1.3554350232231944, "learning_rate": 1.140597610239525e-06, "loss": 0.4299, "step": 3539 }, { "epoch": 0.787716955941255, "grad_norm": 1.3467151690708303, "learning_rate": 1.1383075792456493e-06, "loss": 0.4231, "step": 3540 }, { "epoch": 0.7879394748553628, "grad_norm": 1.2367351806751772, "learning_rate": 1.1360195541258606e-06, "loss": 0.4219, "step": 3541 }, { "epoch": 0.7881619937694704, "grad_norm": 1.3641575928673235, "learning_rate": 1.133733536068628e-06, "loss": 0.431, "step": 3542 }, { "epoch": 0.7883845126835781, "grad_norm": 1.3509647789973491, "learning_rate": 1.1314495262613712e-06, "loss": 0.4166, "step": 3543 }, { "epoch": 0.7886070315976857, "grad_norm": 1.2155703932914659, "learning_rate": 1.1291675258904755e-06, "loss": 0.4229, "step": 3544 }, { "epoch": 0.7888295505117935, "grad_norm": 1.3259892723564726, "learning_rate": 1.126887536141274e-06, "loss": 0.4129, "step": 3545 }, { "epoch": 0.7890520694259012, "grad_norm": 1.2899030252506372, "learning_rate": 1.1246095581980604e-06, "loss": 0.4198, "step": 3546 }, { "epoch": 0.7892745883400089, "grad_norm": 1.2634282186556212, "learning_rate": 1.1223335932440827e-06, "loss": 0.4297, "step": 3547 }, { "epoch": 0.7894971072541166, "grad_norm": 1.2266350774360193, "learning_rate": 1.1200596424615396e-06, "loss": 0.4431, "step": 3548 }, { "epoch": 0.7897196261682243, "grad_norm": 1.39877202471509, "learning_rate": 1.117787707031589e-06, "loss": 0.4239, "step": 3549 }, { "epoch": 0.789942145082332, "grad_norm": 1.3017517758124382, "learning_rate": 1.1155177881343383e-06, "loss": 0.4466, "step": 3550 }, { "epoch": 0.7901646639964397, "grad_norm": 1.3366435940688584, "learning_rate": 1.1132498869488496e-06, "loss": 0.4165, "step": 3551 }, { "epoch": 0.7903871829105474, "grad_norm": 1.2666405802764458, "learning_rate": 1.1109840046531368e-06, "loss": 0.4336, "step": 3552 }, { "epoch": 0.7906097018246551, "grad_norm": 1.2727615051958714, "learning_rate": 1.1087201424241622e-06, "loss": 0.4282, "step": 3553 }, { "epoch": 0.7908322207387628, "grad_norm": 1.4698081681895188, "learning_rate": 1.1064583014378417e-06, "loss": 0.423, "step": 3554 }, { "epoch": 0.7910547396528705, "grad_norm": 1.3299302173004812, "learning_rate": 1.1041984828690399e-06, "loss": 0.4176, "step": 3555 }, { "epoch": 0.7912772585669782, "grad_norm": 1.280227663733173, "learning_rate": 1.1019406878915734e-06, "loss": 0.4127, "step": 3556 }, { "epoch": 0.7914997774810859, "grad_norm": 1.3100422100438769, "learning_rate": 1.0996849176782054e-06, "loss": 0.4246, "step": 3557 }, { "epoch": 0.7917222963951935, "grad_norm": 1.4300788866247256, "learning_rate": 1.0974311734006466e-06, "loss": 0.4065, "step": 3558 }, { "epoch": 0.7919448153093013, "grad_norm": 1.3160427430952624, "learning_rate": 1.0951794562295564e-06, "loss": 0.4373, "step": 3559 }, { "epoch": 0.792167334223409, "grad_norm": 1.3545387966913303, "learning_rate": 1.092929767334544e-06, "loss": 0.4249, "step": 3560 }, { "epoch": 0.7923898531375166, "grad_norm": 1.3619289343376146, "learning_rate": 1.0906821078841584e-06, "loss": 0.415, "step": 3561 }, { "epoch": 0.7926123720516244, "grad_norm": 1.3670481760687565, "learning_rate": 1.088436479045903e-06, "loss": 0.4297, "step": 3562 }, { "epoch": 0.7928348909657321, "grad_norm": 1.3522054085581838, "learning_rate": 1.0861928819862189e-06, "loss": 0.4255, "step": 3563 }, { "epoch": 0.7930574098798397, "grad_norm": 1.2532125555717122, "learning_rate": 1.0839513178704968e-06, "loss": 0.4108, "step": 3564 }, { "epoch": 0.7932799287939475, "grad_norm": 1.411648974745317, "learning_rate": 1.081711787863068e-06, "loss": 0.4349, "step": 3565 }, { "epoch": 0.7935024477080552, "grad_norm": 1.3235949485738796, "learning_rate": 1.079474293127209e-06, "loss": 0.4331, "step": 3566 }, { "epoch": 0.7937249666221629, "grad_norm": 1.3101215453262747, "learning_rate": 1.077238834825141e-06, "loss": 0.404, "step": 3567 }, { "epoch": 0.7939474855362706, "grad_norm": 1.2415486332665306, "learning_rate": 1.0750054141180212e-06, "loss": 0.4322, "step": 3568 }, { "epoch": 0.7941700044503783, "grad_norm": 1.3174596177215403, "learning_rate": 1.0727740321659568e-06, "loss": 0.4264, "step": 3569 }, { "epoch": 0.794392523364486, "grad_norm": 1.3463133704514563, "learning_rate": 1.0705446901279897e-06, "loss": 0.4174, "step": 3570 }, { "epoch": 0.7946150422785937, "grad_norm": 1.267043639064423, "learning_rate": 1.068317389162104e-06, "loss": 0.4134, "step": 3571 }, { "epoch": 0.7948375611927013, "grad_norm": 1.2537310394185093, "learning_rate": 1.0660921304252259e-06, "loss": 0.4243, "step": 3572 }, { "epoch": 0.7950600801068091, "grad_norm": 1.2952439360211363, "learning_rate": 1.0638689150732157e-06, "loss": 0.4216, "step": 3573 }, { "epoch": 0.7952825990209168, "grad_norm": 1.400012823800029, "learning_rate": 1.0616477442608774e-06, "loss": 0.424, "step": 3574 }, { "epoch": 0.7955051179350244, "grad_norm": 1.3662861346699795, "learning_rate": 1.0594286191419501e-06, "loss": 0.4189, "step": 3575 }, { "epoch": 0.7957276368491322, "grad_norm": 1.2461949983897753, "learning_rate": 1.0572115408691119e-06, "loss": 0.41, "step": 3576 }, { "epoch": 0.7959501557632399, "grad_norm": 1.20207331741288, "learning_rate": 1.054996510593978e-06, "loss": 0.4156, "step": 3577 }, { "epoch": 0.7961726746773475, "grad_norm": 1.2897264715735939, "learning_rate": 1.052783529467097e-06, "loss": 0.4222, "step": 3578 }, { "epoch": 0.7963951935914553, "grad_norm": 1.3438322714626565, "learning_rate": 1.0505725986379544e-06, "loss": 0.4145, "step": 3579 }, { "epoch": 0.796617712505563, "grad_norm": 1.3336582554096943, "learning_rate": 1.0483637192549728e-06, "loss": 0.4339, "step": 3580 }, { "epoch": 0.7968402314196706, "grad_norm": 1.369614452200341, "learning_rate": 1.0461568924655074e-06, "loss": 0.4266, "step": 3581 }, { "epoch": 0.7970627503337784, "grad_norm": 1.2695833665786929, "learning_rate": 1.0439521194158486e-06, "loss": 0.4256, "step": 3582 }, { "epoch": 0.7972852692478861, "grad_norm": 1.3109903624533261, "learning_rate": 1.0417494012512163e-06, "loss": 0.4421, "step": 3583 }, { "epoch": 0.7975077881619937, "grad_norm": 1.3888974849195987, "learning_rate": 1.0395487391157683e-06, "loss": 0.4314, "step": 3584 }, { "epoch": 0.7977303070761015, "grad_norm": 1.2477878174612071, "learning_rate": 1.0373501341525894e-06, "loss": 0.4227, "step": 3585 }, { "epoch": 0.7979528259902091, "grad_norm": 1.297996096134191, "learning_rate": 1.0351535875036978e-06, "loss": 0.4262, "step": 3586 }, { "epoch": 0.7981753449043169, "grad_norm": 1.2544899634677493, "learning_rate": 1.0329591003100475e-06, "loss": 0.4463, "step": 3587 }, { "epoch": 0.7983978638184246, "grad_norm": 1.3498328054662636, "learning_rate": 1.0307666737115135e-06, "loss": 0.4344, "step": 3588 }, { "epoch": 0.7986203827325322, "grad_norm": 1.3361705638509758, "learning_rate": 1.0285763088469087e-06, "loss": 0.4381, "step": 3589 }, { "epoch": 0.79884290164664, "grad_norm": 1.340241162069322, "learning_rate": 1.0263880068539684e-06, "loss": 0.4267, "step": 3590 }, { "epoch": 0.7990654205607477, "grad_norm": 1.2933322416544544, "learning_rate": 1.024201768869361e-06, "loss": 0.4307, "step": 3591 }, { "epoch": 0.7992879394748553, "grad_norm": 1.4312559763290327, "learning_rate": 1.022017596028682e-06, "loss": 0.4237, "step": 3592 }, { "epoch": 0.7995104583889631, "grad_norm": 1.362468313904917, "learning_rate": 1.0198354894664524e-06, "loss": 0.4309, "step": 3593 }, { "epoch": 0.7997329773030708, "grad_norm": 1.3133821182299912, "learning_rate": 1.0176554503161235e-06, "loss": 0.4082, "step": 3594 }, { "epoch": 0.7999554962171784, "grad_norm": 1.2631333511565368, "learning_rate": 1.0154774797100669e-06, "loss": 0.4111, "step": 3595 }, { "epoch": 0.8001780151312862, "grad_norm": 1.308568578396162, "learning_rate": 1.0133015787795853e-06, "loss": 0.4048, "step": 3596 }, { "epoch": 0.8004005340453939, "grad_norm": 1.2881728331505016, "learning_rate": 1.011127748654905e-06, "loss": 0.4084, "step": 3597 }, { "epoch": 0.8006230529595015, "grad_norm": 1.4231494566249412, "learning_rate": 1.0089559904651712e-06, "loss": 0.4294, "step": 3598 }, { "epoch": 0.8008455718736093, "grad_norm": 1.3276939184697543, "learning_rate": 1.0067863053384646e-06, "loss": 0.422, "step": 3599 }, { "epoch": 0.8010680907877169, "grad_norm": 1.2680248580673785, "learning_rate": 1.0046186944017767e-06, "loss": 0.4347, "step": 3600 }, { "epoch": 0.8012906097018246, "grad_norm": 1.3170987803690577, "learning_rate": 1.0024531587810282e-06, "loss": 0.4192, "step": 3601 }, { "epoch": 0.8015131286159324, "grad_norm": 1.3184784325573007, "learning_rate": 1.000289699601063e-06, "loss": 0.4105, "step": 3602 }, { "epoch": 0.80173564753004, "grad_norm": 1.2334973259049078, "learning_rate": 9.981283179856405e-07, "loss": 0.4158, "step": 3603 }, { "epoch": 0.8019581664441477, "grad_norm": 1.3671317317044052, "learning_rate": 9.959690150574475e-07, "loss": 0.4282, "step": 3604 }, { "epoch": 0.8021806853582555, "grad_norm": 1.230743354010639, "learning_rate": 9.938117919380835e-07, "loss": 0.4121, "step": 3605 }, { "epoch": 0.8024032042723631, "grad_norm": 1.3165515755528245, "learning_rate": 9.916566497480785e-07, "loss": 0.4102, "step": 3606 }, { "epoch": 0.8026257231864709, "grad_norm": 1.481512235897917, "learning_rate": 9.895035896068705e-07, "loss": 0.4394, "step": 3607 }, { "epoch": 0.8028482421005786, "grad_norm": 1.3112574327639177, "learning_rate": 9.873526126328227e-07, "loss": 0.4467, "step": 3608 }, { "epoch": 0.8030707610146862, "grad_norm": 1.3145739886799537, "learning_rate": 9.852037199432145e-07, "loss": 0.4242, "step": 3609 }, { "epoch": 0.803293279928794, "grad_norm": 1.483642825952603, "learning_rate": 9.830569126542416e-07, "loss": 0.4293, "step": 3610 }, { "epoch": 0.8035157988429017, "grad_norm": 1.2590687373031193, "learning_rate": 9.809121918810183e-07, "loss": 0.4175, "step": 3611 }, { "epoch": 0.8037383177570093, "grad_norm": 1.2966512993694612, "learning_rate": 9.787695587375734e-07, "loss": 0.4196, "step": 3612 }, { "epoch": 0.8039608366711171, "grad_norm": 1.283672688669555, "learning_rate": 9.766290143368535e-07, "loss": 0.4191, "step": 3613 }, { "epoch": 0.8041833555852247, "grad_norm": 1.3455142317749365, "learning_rate": 9.74490559790719e-07, "loss": 0.4193, "step": 3614 }, { "epoch": 0.8044058744993324, "grad_norm": 1.3640789720104267, "learning_rate": 9.72354196209942e-07, "loss": 0.4271, "step": 3615 }, { "epoch": 0.8046283934134402, "grad_norm": 1.286669395842695, "learning_rate": 9.702199247042138e-07, "loss": 0.4315, "step": 3616 }, { "epoch": 0.8048509123275478, "grad_norm": 1.2850435649781287, "learning_rate": 9.680877463821352e-07, "loss": 0.4227, "step": 3617 }, { "epoch": 0.8050734312416555, "grad_norm": 1.2880779428088667, "learning_rate": 9.659576623512219e-07, "loss": 0.4289, "step": 3618 }, { "epoch": 0.8052959501557633, "grad_norm": 1.4179478387553663, "learning_rate": 9.63829673717901e-07, "loss": 0.4189, "step": 3619 }, { "epoch": 0.8055184690698709, "grad_norm": 1.363490383379034, "learning_rate": 9.617037815875085e-07, "loss": 0.4175, "step": 3620 }, { "epoch": 0.8057409879839786, "grad_norm": 1.2600583476074134, "learning_rate": 9.595799870642964e-07, "loss": 0.437, "step": 3621 }, { "epoch": 0.8059635068980864, "grad_norm": 1.3013863453377763, "learning_rate": 9.574582912514252e-07, "loss": 0.424, "step": 3622 }, { "epoch": 0.806186025812194, "grad_norm": 1.2561835123411715, "learning_rate": 9.553386952509603e-07, "loss": 0.4143, "step": 3623 }, { "epoch": 0.8064085447263017, "grad_norm": 1.2826246150772576, "learning_rate": 9.532212001638869e-07, "loss": 0.4096, "step": 3624 }, { "epoch": 0.8066310636404095, "grad_norm": 1.3591766325907642, "learning_rate": 9.511058070900886e-07, "loss": 0.4347, "step": 3625 }, { "epoch": 0.8068535825545171, "grad_norm": 1.3836505954488538, "learning_rate": 9.489925171283637e-07, "loss": 0.439, "step": 3626 }, { "epoch": 0.8070761014686249, "grad_norm": 1.3656897810979725, "learning_rate": 9.46881331376413e-07, "loss": 0.4284, "step": 3627 }, { "epoch": 0.8072986203827325, "grad_norm": 1.3355999645554975, "learning_rate": 9.447722509308494e-07, "loss": 0.4337, "step": 3628 }, { "epoch": 0.8075211392968402, "grad_norm": 1.3299990058702056, "learning_rate": 9.426652768871891e-07, "loss": 0.4173, "step": 3629 }, { "epoch": 0.807743658210948, "grad_norm": 1.3714458182219542, "learning_rate": 9.405604103398552e-07, "loss": 0.4265, "step": 3630 }, { "epoch": 0.8079661771250556, "grad_norm": 1.4228493892485459, "learning_rate": 9.384576523821776e-07, "loss": 0.4342, "step": 3631 }, { "epoch": 0.8081886960391633, "grad_norm": 1.2829202176722883, "learning_rate": 9.363570041063863e-07, "loss": 0.4161, "step": 3632 }, { "epoch": 0.8084112149532711, "grad_norm": 1.4799601632636263, "learning_rate": 9.342584666036192e-07, "loss": 0.4395, "step": 3633 }, { "epoch": 0.8086337338673787, "grad_norm": 1.2437200773485846, "learning_rate": 9.321620409639193e-07, "loss": 0.411, "step": 3634 }, { "epoch": 0.8088562527814864, "grad_norm": 1.400551724749109, "learning_rate": 9.300677282762261e-07, "loss": 0.4162, "step": 3635 }, { "epoch": 0.8090787716955942, "grad_norm": 1.2930612024568053, "learning_rate": 9.279755296283905e-07, "loss": 0.4326, "step": 3636 }, { "epoch": 0.8093012906097018, "grad_norm": 1.3293053535279893, "learning_rate": 9.25885446107157e-07, "loss": 0.4369, "step": 3637 }, { "epoch": 0.8095238095238095, "grad_norm": 1.2652084596063715, "learning_rate": 9.237974787981774e-07, "loss": 0.4286, "step": 3638 }, { "epoch": 0.8097463284379173, "grad_norm": 1.3260463475769486, "learning_rate": 9.217116287860017e-07, "loss": 0.4196, "step": 3639 }, { "epoch": 0.8099688473520249, "grad_norm": 1.347296249745744, "learning_rate": 9.196278971540789e-07, "loss": 0.4268, "step": 3640 }, { "epoch": 0.8101913662661326, "grad_norm": 1.325982924956144, "learning_rate": 9.175462849847594e-07, "loss": 0.415, "step": 3641 }, { "epoch": 0.8104138851802403, "grad_norm": 1.3904133449467093, "learning_rate": 9.154667933592937e-07, "loss": 0.432, "step": 3642 }, { "epoch": 0.810636404094348, "grad_norm": 1.266881579537777, "learning_rate": 9.133894233578288e-07, "loss": 0.4039, "step": 3643 }, { "epoch": 0.8108589230084557, "grad_norm": 1.3629836365322427, "learning_rate": 9.113141760594119e-07, "loss": 0.423, "step": 3644 }, { "epoch": 0.8110814419225634, "grad_norm": 1.2990799709782372, "learning_rate": 9.092410525419831e-07, "loss": 0.4122, "step": 3645 }, { "epoch": 0.8113039608366711, "grad_norm": 1.2651758595210743, "learning_rate": 9.071700538823852e-07, "loss": 0.4203, "step": 3646 }, { "epoch": 0.8115264797507789, "grad_norm": 1.2393633461684572, "learning_rate": 9.051011811563521e-07, "loss": 0.4168, "step": 3647 }, { "epoch": 0.8117489986648865, "grad_norm": 1.3149328327304215, "learning_rate": 9.030344354385157e-07, "loss": 0.4185, "step": 3648 }, { "epoch": 0.8119715175789942, "grad_norm": 1.3449749553594628, "learning_rate": 9.009698178024074e-07, "loss": 0.431, "step": 3649 }, { "epoch": 0.812194036493102, "grad_norm": 1.3556201984610596, "learning_rate": 8.989073293204442e-07, "loss": 0.4081, "step": 3650 }, { "epoch": 0.8124165554072096, "grad_norm": 1.3546708111609445, "learning_rate": 8.968469710639449e-07, "loss": 0.4423, "step": 3651 }, { "epoch": 0.8126390743213173, "grad_norm": 1.3678856894945792, "learning_rate": 8.947887441031167e-07, "loss": 0.4382, "step": 3652 }, { "epoch": 0.8128615932354251, "grad_norm": 1.3412117125989667, "learning_rate": 8.927326495070626e-07, "loss": 0.443, "step": 3653 }, { "epoch": 0.8130841121495327, "grad_norm": 1.2781010613560326, "learning_rate": 8.906786883437773e-07, "loss": 0.4037, "step": 3654 }, { "epoch": 0.8133066310636404, "grad_norm": 1.337543544113158, "learning_rate": 8.886268616801474e-07, "loss": 0.4076, "step": 3655 }, { "epoch": 0.8135291499777481, "grad_norm": 1.2756837469323985, "learning_rate": 8.865771705819521e-07, "loss": 0.4242, "step": 3656 }, { "epoch": 0.8137516688918558, "grad_norm": 1.3688389383775839, "learning_rate": 8.845296161138572e-07, "loss": 0.436, "step": 3657 }, { "epoch": 0.8139741878059635, "grad_norm": 1.3654906311748733, "learning_rate": 8.824841993394228e-07, "loss": 0.4244, "step": 3658 }, { "epoch": 0.8141967067200712, "grad_norm": 1.2742220612482287, "learning_rate": 8.804409213210985e-07, "loss": 0.4226, "step": 3659 }, { "epoch": 0.8144192256341789, "grad_norm": 1.4991585396145084, "learning_rate": 8.783997831202174e-07, "loss": 0.4196, "step": 3660 }, { "epoch": 0.8146417445482866, "grad_norm": 1.338311339509696, "learning_rate": 8.763607857970108e-07, "loss": 0.4139, "step": 3661 }, { "epoch": 0.8148642634623943, "grad_norm": 1.3277237438536371, "learning_rate": 8.743239304105889e-07, "loss": 0.4256, "step": 3662 }, { "epoch": 0.815086782376502, "grad_norm": 1.3610369084411493, "learning_rate": 8.722892180189535e-07, "loss": 0.4211, "step": 3663 }, { "epoch": 0.8153093012906097, "grad_norm": 1.2803649355880091, "learning_rate": 8.702566496789943e-07, "loss": 0.3952, "step": 3664 }, { "epoch": 0.8155318202047174, "grad_norm": 1.2890068405106416, "learning_rate": 8.682262264464842e-07, "loss": 0.413, "step": 3665 }, { "epoch": 0.8157543391188251, "grad_norm": 1.4927379925439315, "learning_rate": 8.661979493760836e-07, "loss": 0.4108, "step": 3666 }, { "epoch": 0.8159768580329329, "grad_norm": 1.316287024713673, "learning_rate": 8.641718195213377e-07, "loss": 0.4164, "step": 3667 }, { "epoch": 0.8161993769470405, "grad_norm": 1.3584902399118455, "learning_rate": 8.621478379346782e-07, "loss": 0.4093, "step": 3668 }, { "epoch": 0.8164218958611482, "grad_norm": 1.3631959821208612, "learning_rate": 8.601260056674188e-07, "loss": 0.4183, "step": 3669 }, { "epoch": 0.8166444147752558, "grad_norm": 1.3983394585560927, "learning_rate": 8.581063237697551e-07, "loss": 0.4079, "step": 3670 }, { "epoch": 0.8168669336893636, "grad_norm": 1.3169833348972455, "learning_rate": 8.560887932907719e-07, "loss": 0.4186, "step": 3671 }, { "epoch": 0.8170894526034713, "grad_norm": 1.3615327863178859, "learning_rate": 8.54073415278427e-07, "loss": 0.4107, "step": 3672 }, { "epoch": 0.817311971517579, "grad_norm": 1.2837744714713764, "learning_rate": 8.520601907795717e-07, "loss": 0.4395, "step": 3673 }, { "epoch": 0.8175344904316867, "grad_norm": 1.3084331568829555, "learning_rate": 8.500491208399287e-07, "loss": 0.425, "step": 3674 }, { "epoch": 0.8177570093457944, "grad_norm": 1.2592380321443726, "learning_rate": 8.480402065041071e-07, "loss": 0.4076, "step": 3675 }, { "epoch": 0.8179795282599021, "grad_norm": 1.3304898470220605, "learning_rate": 8.460334488155952e-07, "loss": 0.4179, "step": 3676 }, { "epoch": 0.8182020471740098, "grad_norm": 1.4586377439235483, "learning_rate": 8.440288488167592e-07, "loss": 0.3992, "step": 3677 }, { "epoch": 0.8184245660881175, "grad_norm": 1.1943689172529217, "learning_rate": 8.420264075488466e-07, "loss": 0.413, "step": 3678 }, { "epoch": 0.8186470850022252, "grad_norm": 1.3515374696381532, "learning_rate": 8.400261260519832e-07, "loss": 0.4087, "step": 3679 }, { "epoch": 0.8188696039163329, "grad_norm": 1.3034206062745808, "learning_rate": 8.380280053651723e-07, "loss": 0.4149, "step": 3680 }, { "epoch": 0.8190921228304406, "grad_norm": 1.2921758142681061, "learning_rate": 8.360320465262973e-07, "loss": 0.433, "step": 3681 }, { "epoch": 0.8193146417445483, "grad_norm": 1.2686857533823128, "learning_rate": 8.340382505721134e-07, "loss": 0.4249, "step": 3682 }, { "epoch": 0.819537160658656, "grad_norm": 1.316190435146303, "learning_rate": 8.32046618538257e-07, "loss": 0.4229, "step": 3683 }, { "epoch": 0.8197596795727636, "grad_norm": 1.3819396823702905, "learning_rate": 8.300571514592404e-07, "loss": 0.412, "step": 3684 }, { "epoch": 0.8199821984868714, "grad_norm": 1.3431136860971467, "learning_rate": 8.280698503684458e-07, "loss": 0.3992, "step": 3685 }, { "epoch": 0.8202047174009791, "grad_norm": 1.2970653573872162, "learning_rate": 8.260847162981406e-07, "loss": 0.4117, "step": 3686 }, { "epoch": 0.8204272363150867, "grad_norm": 1.313358148152233, "learning_rate": 8.241017502794557e-07, "loss": 0.4132, "step": 3687 }, { "epoch": 0.8206497552291945, "grad_norm": 1.396534201251811, "learning_rate": 8.221209533424035e-07, "loss": 0.3994, "step": 3688 }, { "epoch": 0.8208722741433022, "grad_norm": 1.379125131525852, "learning_rate": 8.201423265158648e-07, "loss": 0.4193, "step": 3689 }, { "epoch": 0.8210947930574098, "grad_norm": 1.574224819219125, "learning_rate": 8.181658708275958e-07, "loss": 0.405, "step": 3690 }, { "epoch": 0.8213173119715176, "grad_norm": 1.3828650377181144, "learning_rate": 8.161915873042253e-07, "loss": 0.4242, "step": 3691 }, { "epoch": 0.8215398308856253, "grad_norm": 1.3703526833925133, "learning_rate": 8.142194769712519e-07, "loss": 0.4277, "step": 3692 }, { "epoch": 0.821762349799733, "grad_norm": 1.269874391682829, "learning_rate": 8.122495408530484e-07, "loss": 0.4072, "step": 3693 }, { "epoch": 0.8219848687138407, "grad_norm": 1.4803921837621679, "learning_rate": 8.10281779972854e-07, "loss": 0.4333, "step": 3694 }, { "epoch": 0.8222073876279484, "grad_norm": 1.1867859485333727, "learning_rate": 8.083161953527807e-07, "loss": 0.4123, "step": 3695 }, { "epoch": 0.822429906542056, "grad_norm": 1.2752403196106252, "learning_rate": 8.063527880138112e-07, "loss": 0.4221, "step": 3696 }, { "epoch": 0.8226524254561638, "grad_norm": 1.3690930451933272, "learning_rate": 8.043915589757928e-07, "loss": 0.4132, "step": 3697 }, { "epoch": 0.8228749443702714, "grad_norm": 1.3197354744778318, "learning_rate": 8.024325092574475e-07, "loss": 0.4258, "step": 3698 }, { "epoch": 0.8230974632843792, "grad_norm": 1.5470496775269358, "learning_rate": 8.004756398763602e-07, "loss": 0.4371, "step": 3699 }, { "epoch": 0.8233199821984869, "grad_norm": 1.3667903081481696, "learning_rate": 7.985209518489856e-07, "loss": 0.4145, "step": 3700 }, { "epoch": 0.8235425011125945, "grad_norm": 1.3300904505103268, "learning_rate": 7.965684461906453e-07, "loss": 0.4261, "step": 3701 }, { "epoch": 0.8237650200267023, "grad_norm": 1.481508171366786, "learning_rate": 7.946181239155259e-07, "loss": 0.4385, "step": 3702 }, { "epoch": 0.82398753894081, "grad_norm": 1.2880302567351818, "learning_rate": 7.92669986036681e-07, "loss": 0.4042, "step": 3703 }, { "epoch": 0.8242100578549176, "grad_norm": 1.5177409770744636, "learning_rate": 7.907240335660299e-07, "loss": 0.4362, "step": 3704 }, { "epoch": 0.8244325767690254, "grad_norm": 1.3696145226132075, "learning_rate": 7.887802675143563e-07, "loss": 0.4341, "step": 3705 }, { "epoch": 0.8246550956831331, "grad_norm": 1.289216245006959, "learning_rate": 7.868386888913093e-07, "loss": 0.4165, "step": 3706 }, { "epoch": 0.8248776145972407, "grad_norm": 1.3293539658744562, "learning_rate": 7.848992987053982e-07, "loss": 0.4351, "step": 3707 }, { "epoch": 0.8251001335113485, "grad_norm": 1.3506304696962865, "learning_rate": 7.829620979640002e-07, "loss": 0.4129, "step": 3708 }, { "epoch": 0.8253226524254562, "grad_norm": 1.3801058704605247, "learning_rate": 7.8102708767335e-07, "loss": 0.4292, "step": 3709 }, { "epoch": 0.8255451713395638, "grad_norm": 1.3043530360951299, "learning_rate": 7.790942688385511e-07, "loss": 0.4216, "step": 3710 }, { "epoch": 0.8257676902536716, "grad_norm": 1.2375083990646722, "learning_rate": 7.771636424635648e-07, "loss": 0.4169, "step": 3711 }, { "epoch": 0.8259902091677792, "grad_norm": 1.3437343824271983, "learning_rate": 7.752352095512117e-07, "loss": 0.4234, "step": 3712 }, { "epoch": 0.826212728081887, "grad_norm": 1.357626862769189, "learning_rate": 7.733089711031777e-07, "loss": 0.4141, "step": 3713 }, { "epoch": 0.8264352469959947, "grad_norm": 1.3620383914498837, "learning_rate": 7.713849281200042e-07, "loss": 0.4112, "step": 3714 }, { "epoch": 0.8266577659101023, "grad_norm": 1.2504876618754885, "learning_rate": 7.694630816010956e-07, "loss": 0.4278, "step": 3715 }, { "epoch": 0.82688028482421, "grad_norm": 1.295461634399557, "learning_rate": 7.675434325447139e-07, "loss": 0.4117, "step": 3716 }, { "epoch": 0.8271028037383178, "grad_norm": 1.38914972777984, "learning_rate": 7.656259819479811e-07, "loss": 0.4309, "step": 3717 }, { "epoch": 0.8273253226524254, "grad_norm": 1.332251308454196, "learning_rate": 7.637107308068758e-07, "loss": 0.424, "step": 3718 }, { "epoch": 0.8275478415665332, "grad_norm": 1.308454173265295, "learning_rate": 7.617976801162336e-07, "loss": 0.4073, "step": 3719 }, { "epoch": 0.8277703604806409, "grad_norm": 1.3083672974577323, "learning_rate": 7.598868308697483e-07, "loss": 0.3954, "step": 3720 }, { "epoch": 0.8279928793947485, "grad_norm": 1.3504898926049167, "learning_rate": 7.579781840599709e-07, "loss": 0.4061, "step": 3721 }, { "epoch": 0.8282153983088563, "grad_norm": 1.3015798871710138, "learning_rate": 7.560717406783053e-07, "loss": 0.3989, "step": 3722 }, { "epoch": 0.828437917222964, "grad_norm": 1.3092999594162447, "learning_rate": 7.541675017150157e-07, "loss": 0.415, "step": 3723 }, { "epoch": 0.8286604361370716, "grad_norm": 1.3982709682773184, "learning_rate": 7.522654681592173e-07, "loss": 0.4245, "step": 3724 }, { "epoch": 0.8288829550511794, "grad_norm": 1.4960681686835506, "learning_rate": 7.503656409988803e-07, "loss": 0.4139, "step": 3725 }, { "epoch": 0.829105473965287, "grad_norm": 1.5715207994376728, "learning_rate": 7.484680212208317e-07, "loss": 0.4166, "step": 3726 }, { "epoch": 0.8293279928793947, "grad_norm": 1.3693301533442357, "learning_rate": 7.465726098107473e-07, "loss": 0.4338, "step": 3727 }, { "epoch": 0.8295505117935025, "grad_norm": 1.3705893124586987, "learning_rate": 7.446794077531593e-07, "loss": 0.4325, "step": 3728 }, { "epoch": 0.8297730307076101, "grad_norm": 1.3679388258716396, "learning_rate": 7.427884160314513e-07, "loss": 0.4173, "step": 3729 }, { "epoch": 0.8299955496217178, "grad_norm": 1.3980738695590536, "learning_rate": 7.408996356278592e-07, "loss": 0.429, "step": 3730 }, { "epoch": 0.8302180685358256, "grad_norm": 1.4535875811445498, "learning_rate": 7.390130675234703e-07, "loss": 0.4262, "step": 3731 }, { "epoch": 0.8304405874499332, "grad_norm": 1.2845949965579955, "learning_rate": 7.371287126982208e-07, "loss": 0.399, "step": 3732 }, { "epoch": 0.830663106364041, "grad_norm": 1.3854200363839506, "learning_rate": 7.352465721309005e-07, "loss": 0.4133, "step": 3733 }, { "epoch": 0.8308856252781487, "grad_norm": 1.411094147288991, "learning_rate": 7.333666467991435e-07, "loss": 0.404, "step": 3734 }, { "epoch": 0.8311081441922563, "grad_norm": 1.377085711705447, "learning_rate": 7.314889376794426e-07, "loss": 0.4089, "step": 3735 }, { "epoch": 0.831330663106364, "grad_norm": 1.3345165484932378, "learning_rate": 7.296134457471304e-07, "loss": 0.4211, "step": 3736 }, { "epoch": 0.8315531820204718, "grad_norm": 1.3028471630236302, "learning_rate": 7.277401719763916e-07, "loss": 0.3977, "step": 3737 }, { "epoch": 0.8317757009345794, "grad_norm": 1.332802425797395, "learning_rate": 7.258691173402604e-07, "loss": 0.4322, "step": 3738 }, { "epoch": 0.8319982198486872, "grad_norm": 1.337447362307421, "learning_rate": 7.240002828106141e-07, "loss": 0.4014, "step": 3739 }, { "epoch": 0.8322207387627948, "grad_norm": 1.6786774832230866, "learning_rate": 7.221336693581798e-07, "loss": 0.4266, "step": 3740 }, { "epoch": 0.8324432576769025, "grad_norm": 1.3978359535147582, "learning_rate": 7.202692779525305e-07, "loss": 0.4079, "step": 3741 }, { "epoch": 0.8326657765910103, "grad_norm": 1.4071905474817523, "learning_rate": 7.184071095620849e-07, "loss": 0.4259, "step": 3742 }, { "epoch": 0.8328882955051179, "grad_norm": 1.5042575665675286, "learning_rate": 7.165471651541073e-07, "loss": 0.4104, "step": 3743 }, { "epoch": 0.8331108144192256, "grad_norm": 1.385947705696704, "learning_rate": 7.146894456947045e-07, "loss": 0.4392, "step": 3744 }, { "epoch": 0.8333333333333334, "grad_norm": 1.2892756474004554, "learning_rate": 7.128339521488304e-07, "loss": 0.4162, "step": 3745 }, { "epoch": 0.833555852247441, "grad_norm": 1.554493141927368, "learning_rate": 7.10980685480282e-07, "loss": 0.4177, "step": 3746 }, { "epoch": 0.8337783711615487, "grad_norm": 1.2806374456448075, "learning_rate": 7.091296466516989e-07, "loss": 0.4038, "step": 3747 }, { "epoch": 0.8340008900756565, "grad_norm": 1.3743521362033007, "learning_rate": 7.072808366245649e-07, "loss": 0.4165, "step": 3748 }, { "epoch": 0.8342234089897641, "grad_norm": 1.264106559109458, "learning_rate": 7.054342563592032e-07, "loss": 0.4475, "step": 3749 }, { "epoch": 0.8344459279038718, "grad_norm": 1.4546958701017132, "learning_rate": 7.035899068147834e-07, "loss": 0.4058, "step": 3750 }, { "epoch": 0.8346684468179796, "grad_norm": 1.2632866312759097, "learning_rate": 7.017477889493102e-07, "loss": 0.407, "step": 3751 }, { "epoch": 0.8348909657320872, "grad_norm": 1.3034334593476986, "learning_rate": 6.999079037196349e-07, "loss": 0.4148, "step": 3752 }, { "epoch": 0.835113484646195, "grad_norm": 1.266093452739125, "learning_rate": 6.980702520814458e-07, "loss": 0.4086, "step": 3753 }, { "epoch": 0.8353360035603026, "grad_norm": 1.3094752087259864, "learning_rate": 6.962348349892728e-07, "loss": 0.4169, "step": 3754 }, { "epoch": 0.8355585224744103, "grad_norm": 1.302654812023172, "learning_rate": 6.944016533964854e-07, "loss": 0.4135, "step": 3755 }, { "epoch": 0.835781041388518, "grad_norm": 1.3423630494762533, "learning_rate": 6.92570708255288e-07, "loss": 0.4126, "step": 3756 }, { "epoch": 0.8360035603026257, "grad_norm": 1.3441904832627285, "learning_rate": 6.907420005167276e-07, "loss": 0.4322, "step": 3757 }, { "epoch": 0.8362260792167334, "grad_norm": 1.2576561853294996, "learning_rate": 6.889155311306889e-07, "loss": 0.4065, "step": 3758 }, { "epoch": 0.8364485981308412, "grad_norm": 1.3497010116854158, "learning_rate": 6.87091301045889e-07, "loss": 0.4095, "step": 3759 }, { "epoch": 0.8366711170449488, "grad_norm": 1.437343323806691, "learning_rate": 6.852693112098902e-07, "loss": 0.409, "step": 3760 }, { "epoch": 0.8368936359590565, "grad_norm": 1.3416975538663962, "learning_rate": 6.834495625690824e-07, "loss": 0.4333, "step": 3761 }, { "epoch": 0.8371161548731643, "grad_norm": 1.2258058495019126, "learning_rate": 6.816320560686973e-07, "loss": 0.4076, "step": 3762 }, { "epoch": 0.8373386737872719, "grad_norm": 1.352752565163779, "learning_rate": 6.79816792652801e-07, "loss": 0.4209, "step": 3763 }, { "epoch": 0.8375611927013796, "grad_norm": 1.3769713734118272, "learning_rate": 6.780037732642908e-07, "loss": 0.4145, "step": 3764 }, { "epoch": 0.8377837116154874, "grad_norm": 1.4009238652124198, "learning_rate": 6.761929988449029e-07, "loss": 0.4296, "step": 3765 }, { "epoch": 0.838006230529595, "grad_norm": 1.2783720260483011, "learning_rate": 6.743844703352049e-07, "loss": 0.4152, "step": 3766 }, { "epoch": 0.8382287494437027, "grad_norm": 1.259820844581071, "learning_rate": 6.725781886745985e-07, "loss": 0.4283, "step": 3767 }, { "epoch": 0.8384512683578104, "grad_norm": 1.2626790044176552, "learning_rate": 6.707741548013202e-07, "loss": 0.4392, "step": 3768 }, { "epoch": 0.8386737872719181, "grad_norm": 1.3992063145369702, "learning_rate": 6.689723696524348e-07, "loss": 0.4349, "step": 3769 }, { "epoch": 0.8388963061860258, "grad_norm": 1.286217275637627, "learning_rate": 6.671728341638428e-07, "loss": 0.4331, "step": 3770 }, { "epoch": 0.8391188251001335, "grad_norm": 1.3396593208222771, "learning_rate": 6.653755492702718e-07, "loss": 0.4263, "step": 3771 }, { "epoch": 0.8393413440142412, "grad_norm": 1.3827500760226425, "learning_rate": 6.635805159052866e-07, "loss": 0.4038, "step": 3772 }, { "epoch": 0.839563862928349, "grad_norm": 1.3826236536329028, "learning_rate": 6.617877350012785e-07, "loss": 0.4123, "step": 3773 }, { "epoch": 0.8397863818424566, "grad_norm": 1.3196738202305152, "learning_rate": 6.599972074894684e-07, "loss": 0.4056, "step": 3774 }, { "epoch": 0.8400089007565643, "grad_norm": 1.3720531707338621, "learning_rate": 6.582089342999093e-07, "loss": 0.4281, "step": 3775 }, { "epoch": 0.840231419670672, "grad_norm": 1.5938510323048114, "learning_rate": 6.564229163614793e-07, "loss": 0.4146, "step": 3776 }, { "epoch": 0.8404539385847797, "grad_norm": 1.318138315394852, "learning_rate": 6.546391546018893e-07, "loss": 0.4245, "step": 3777 }, { "epoch": 0.8406764574988874, "grad_norm": 1.2852874722872805, "learning_rate": 6.528576499476757e-07, "loss": 0.4047, "step": 3778 }, { "epoch": 0.8408989764129952, "grad_norm": 1.363323178660982, "learning_rate": 6.51078403324204e-07, "loss": 0.4188, "step": 3779 }, { "epoch": 0.8411214953271028, "grad_norm": 1.3319497199288788, "learning_rate": 6.49301415655666e-07, "loss": 0.42, "step": 3780 }, { "epoch": 0.8413440142412105, "grad_norm": 1.4657709453025005, "learning_rate": 6.475266878650793e-07, "loss": 0.4321, "step": 3781 }, { "epoch": 0.8415665331553182, "grad_norm": 1.3007911955729612, "learning_rate": 6.45754220874289e-07, "loss": 0.4261, "step": 3782 }, { "epoch": 0.8417890520694259, "grad_norm": 1.4159140507995005, "learning_rate": 6.439840156039657e-07, "loss": 0.4104, "step": 3783 }, { "epoch": 0.8420115709835336, "grad_norm": 1.2252892547260357, "learning_rate": 6.422160729736044e-07, "loss": 0.4106, "step": 3784 }, { "epoch": 0.8422340898976413, "grad_norm": 1.4355118695497824, "learning_rate": 6.404503939015266e-07, "loss": 0.4127, "step": 3785 }, { "epoch": 0.842456608811749, "grad_norm": 1.542971612966642, "learning_rate": 6.38686979304875e-07, "loss": 0.4139, "step": 3786 }, { "epoch": 0.8426791277258567, "grad_norm": 1.3163600717048822, "learning_rate": 6.369258300996184e-07, "loss": 0.4139, "step": 3787 }, { "epoch": 0.8429016466399644, "grad_norm": 1.322550116855972, "learning_rate": 6.3516694720055e-07, "loss": 0.4207, "step": 3788 }, { "epoch": 0.8431241655540721, "grad_norm": 1.3951702233212593, "learning_rate": 6.334103315212819e-07, "loss": 0.4171, "step": 3789 }, { "epoch": 0.8433466844681798, "grad_norm": 1.37102100149002, "learning_rate": 6.316559839742514e-07, "loss": 0.4113, "step": 3790 }, { "epoch": 0.8435692033822875, "grad_norm": 1.2505361540341857, "learning_rate": 6.299039054707174e-07, "loss": 0.4056, "step": 3791 }, { "epoch": 0.8437917222963952, "grad_norm": 1.3882287875520032, "learning_rate": 6.2815409692076e-07, "loss": 0.429, "step": 3792 }, { "epoch": 0.844014241210503, "grad_norm": 1.2582760990471955, "learning_rate": 6.264065592332807e-07, "loss": 0.4127, "step": 3793 }, { "epoch": 0.8442367601246106, "grad_norm": 1.3668465670943843, "learning_rate": 6.24661293315999e-07, "loss": 0.4277, "step": 3794 }, { "epoch": 0.8444592790387183, "grad_norm": 1.2742372379285982, "learning_rate": 6.229183000754579e-07, "loss": 0.4124, "step": 3795 }, { "epoch": 0.8446817979528259, "grad_norm": 1.3932630092834333, "learning_rate": 6.211775804170161e-07, "loss": 0.4352, "step": 3796 }, { "epoch": 0.8449043168669337, "grad_norm": 1.5784833687234643, "learning_rate": 6.194391352448564e-07, "loss": 0.4262, "step": 3797 }, { "epoch": 0.8451268357810414, "grad_norm": 1.4110609969504604, "learning_rate": 6.177029654619748e-07, "loss": 0.4296, "step": 3798 }, { "epoch": 0.845349354695149, "grad_norm": 1.2468554445912639, "learning_rate": 6.159690719701888e-07, "loss": 0.4231, "step": 3799 }, { "epoch": 0.8455718736092568, "grad_norm": 1.345755479257665, "learning_rate": 6.142374556701336e-07, "loss": 0.4156, "step": 3800 }, { "epoch": 0.8457943925233645, "grad_norm": 1.2478308292261533, "learning_rate": 6.125081174612585e-07, "loss": 0.3981, "step": 3801 }, { "epoch": 0.8460169114374722, "grad_norm": 1.6018873261630329, "learning_rate": 6.107810582418317e-07, "loss": 0.4063, "step": 3802 }, { "epoch": 0.8462394303515799, "grad_norm": 1.262717876972493, "learning_rate": 6.090562789089383e-07, "loss": 0.4313, "step": 3803 }, { "epoch": 0.8464619492656876, "grad_norm": 1.3025372549339373, "learning_rate": 6.073337803584778e-07, "loss": 0.4193, "step": 3804 }, { "epoch": 0.8466844681797953, "grad_norm": 1.44877918710627, "learning_rate": 6.056135634851673e-07, "loss": 0.43, "step": 3805 }, { "epoch": 0.846906987093903, "grad_norm": 1.428623345485367, "learning_rate": 6.038956291825338e-07, "loss": 0.414, "step": 3806 }, { "epoch": 0.8471295060080107, "grad_norm": 1.3311909645599753, "learning_rate": 6.021799783429233e-07, "loss": 0.3936, "step": 3807 }, { "epoch": 0.8473520249221184, "grad_norm": 1.2845407373136393, "learning_rate": 6.004666118574948e-07, "loss": 0.413, "step": 3808 }, { "epoch": 0.8475745438362261, "grad_norm": 1.322950302416371, "learning_rate": 5.98755530616219e-07, "loss": 0.4131, "step": 3809 }, { "epoch": 0.8477970627503337, "grad_norm": 1.2811013496725427, "learning_rate": 5.970467355078819e-07, "loss": 0.4103, "step": 3810 }, { "epoch": 0.8480195816644415, "grad_norm": 1.2068373400388788, "learning_rate": 5.953402274200798e-07, "loss": 0.4086, "step": 3811 }, { "epoch": 0.8482421005785492, "grad_norm": 1.2773285302757404, "learning_rate": 5.936360072392217e-07, "loss": 0.4061, "step": 3812 }, { "epoch": 0.8484646194926568, "grad_norm": 1.3436293394423, "learning_rate": 5.91934075850531e-07, "loss": 0.409, "step": 3813 }, { "epoch": 0.8486871384067646, "grad_norm": 1.2652241391106593, "learning_rate": 5.902344341380351e-07, "loss": 0.4196, "step": 3814 }, { "epoch": 0.8489096573208723, "grad_norm": 1.3859399631891764, "learning_rate": 5.885370829845826e-07, "loss": 0.4187, "step": 3815 }, { "epoch": 0.8491321762349799, "grad_norm": 1.1976046547165287, "learning_rate": 5.868420232718225e-07, "loss": 0.4195, "step": 3816 }, { "epoch": 0.8493546951490877, "grad_norm": 1.252312624812842, "learning_rate": 5.851492558802191e-07, "loss": 0.4316, "step": 3817 }, { "epoch": 0.8495772140631954, "grad_norm": 1.2992829612257168, "learning_rate": 5.834587816890436e-07, "loss": 0.4264, "step": 3818 }, { "epoch": 0.849799732977303, "grad_norm": 1.34353503036448, "learning_rate": 5.817706015763774e-07, "loss": 0.4251, "step": 3819 }, { "epoch": 0.8500222518914108, "grad_norm": 1.288984099038828, "learning_rate": 5.800847164191093e-07, "loss": 0.431, "step": 3820 }, { "epoch": 0.8502447708055185, "grad_norm": 1.2617424307077452, "learning_rate": 5.784011270929374e-07, "loss": 0.4258, "step": 3821 }, { "epoch": 0.8504672897196262, "grad_norm": 1.2387236130298767, "learning_rate": 5.767198344723667e-07, "loss": 0.4037, "step": 3822 }, { "epoch": 0.8506898086337339, "grad_norm": 1.3496055668247857, "learning_rate": 5.750408394307072e-07, "loss": 0.429, "step": 3823 }, { "epoch": 0.8509123275478415, "grad_norm": 1.379088899071646, "learning_rate": 5.733641428400782e-07, "loss": 0.4066, "step": 3824 }, { "epoch": 0.8511348464619493, "grad_norm": 1.2653291165455174, "learning_rate": 5.716897455714054e-07, "loss": 0.4068, "step": 3825 }, { "epoch": 0.851357365376057, "grad_norm": 1.321638573550825, "learning_rate": 5.70017648494417e-07, "loss": 0.4013, "step": 3826 }, { "epoch": 0.8515798842901646, "grad_norm": 1.3322867472636444, "learning_rate": 5.683478524776481e-07, "loss": 0.426, "step": 3827 }, { "epoch": 0.8518024032042724, "grad_norm": 1.3135466538569325, "learning_rate": 5.6668035838844e-07, "loss": 0.4285, "step": 3828 }, { "epoch": 0.8520249221183801, "grad_norm": 1.3674237470320707, "learning_rate": 5.650151670929371e-07, "loss": 0.4308, "step": 3829 }, { "epoch": 0.8522474410324877, "grad_norm": 1.3184016674731447, "learning_rate": 5.633522794560875e-07, "loss": 0.4154, "step": 3830 }, { "epoch": 0.8524699599465955, "grad_norm": 1.277766435695051, "learning_rate": 5.616916963416419e-07, "loss": 0.4313, "step": 3831 }, { "epoch": 0.8526924788607032, "grad_norm": 1.2496830372345051, "learning_rate": 5.60033418612157e-07, "loss": 0.4143, "step": 3832 }, { "epoch": 0.8529149977748108, "grad_norm": 1.3323401812708187, "learning_rate": 5.58377447128986e-07, "loss": 0.4163, "step": 3833 }, { "epoch": 0.8531375166889186, "grad_norm": 1.4068602889134652, "learning_rate": 5.56723782752292e-07, "loss": 0.4107, "step": 3834 }, { "epoch": 0.8533600356030263, "grad_norm": 1.399566342736468, "learning_rate": 5.550724263410351e-07, "loss": 0.4209, "step": 3835 }, { "epoch": 0.8535825545171339, "grad_norm": 1.331429114432383, "learning_rate": 5.534233787529764e-07, "loss": 0.4095, "step": 3836 }, { "epoch": 0.8538050734312417, "grad_norm": 1.2542535017416103, "learning_rate": 5.51776640844679e-07, "loss": 0.4114, "step": 3837 }, { "epoch": 0.8540275923453493, "grad_norm": 1.2827130745552393, "learning_rate": 5.501322134715053e-07, "loss": 0.4083, "step": 3838 }, { "epoch": 0.854250111259457, "grad_norm": 1.308238851153703, "learning_rate": 5.48490097487619e-07, "loss": 0.4145, "step": 3839 }, { "epoch": 0.8544726301735648, "grad_norm": 1.3985307965105438, "learning_rate": 5.468502937459818e-07, "loss": 0.3983, "step": 3840 }, { "epoch": 0.8546951490876724, "grad_norm": 1.392337626652894, "learning_rate": 5.45212803098355e-07, "loss": 0.4053, "step": 3841 }, { "epoch": 0.8549176680017802, "grad_norm": 1.2727811587521178, "learning_rate": 5.435776263952996e-07, "loss": 0.3975, "step": 3842 }, { "epoch": 0.8551401869158879, "grad_norm": 1.3557019569435516, "learning_rate": 5.419447644861719e-07, "loss": 0.4271, "step": 3843 }, { "epoch": 0.8553627058299955, "grad_norm": 1.2737888772841743, "learning_rate": 5.403142182191274e-07, "loss": 0.4189, "step": 3844 }, { "epoch": 0.8555852247441033, "grad_norm": 1.3767009607604652, "learning_rate": 5.386859884411189e-07, "loss": 0.4097, "step": 3845 }, { "epoch": 0.855807743658211, "grad_norm": 1.3887254094239627, "learning_rate": 5.370600759978961e-07, "loss": 0.4173, "step": 3846 }, { "epoch": 0.8560302625723186, "grad_norm": 1.3463405351187807, "learning_rate": 5.354364817340052e-07, "loss": 0.3982, "step": 3847 }, { "epoch": 0.8562527814864264, "grad_norm": 1.3350621739350466, "learning_rate": 5.338152064927865e-07, "loss": 0.4027, "step": 3848 }, { "epoch": 0.8564753004005341, "grad_norm": 1.3324195119306153, "learning_rate": 5.32196251116377e-07, "loss": 0.4321, "step": 3849 }, { "epoch": 0.8566978193146417, "grad_norm": 1.3859146404242966, "learning_rate": 5.305796164457106e-07, "loss": 0.4176, "step": 3850 }, { "epoch": 0.8569203382287495, "grad_norm": 1.3595748848897955, "learning_rate": 5.289653033205106e-07, "loss": 0.4043, "step": 3851 }, { "epoch": 0.8571428571428571, "grad_norm": 1.2483627027476931, "learning_rate": 5.273533125793013e-07, "loss": 0.4178, "step": 3852 }, { "epoch": 0.8573653760569648, "grad_norm": 1.269493761590784, "learning_rate": 5.257436450593944e-07, "loss": 0.4106, "step": 3853 }, { "epoch": 0.8575878949710726, "grad_norm": 1.395273238750461, "learning_rate": 5.241363015968981e-07, "loss": 0.4265, "step": 3854 }, { "epoch": 0.8578104138851802, "grad_norm": 1.551482408454165, "learning_rate": 5.225312830267143e-07, "loss": 0.4073, "step": 3855 }, { "epoch": 0.8580329327992879, "grad_norm": 1.3465769299776589, "learning_rate": 5.209285901825334e-07, "loss": 0.4235, "step": 3856 }, { "epoch": 0.8582554517133957, "grad_norm": 1.2484409862467314, "learning_rate": 5.193282238968417e-07, "loss": 0.4104, "step": 3857 }, { "epoch": 0.8584779706275033, "grad_norm": 1.283374597391954, "learning_rate": 5.177301850009147e-07, "loss": 0.4047, "step": 3858 }, { "epoch": 0.858700489541611, "grad_norm": 1.2842916109550966, "learning_rate": 5.161344743248209e-07, "loss": 0.3933, "step": 3859 }, { "epoch": 0.8589230084557188, "grad_norm": 1.3538188769380939, "learning_rate": 5.145410926974171e-07, "loss": 0.4365, "step": 3860 }, { "epoch": 0.8591455273698264, "grad_norm": 1.4649974119117897, "learning_rate": 5.129500409463517e-07, "loss": 0.4104, "step": 3861 }, { "epoch": 0.8593680462839342, "grad_norm": 1.2918619179734234, "learning_rate": 5.113613198980644e-07, "loss": 0.4043, "step": 3862 }, { "epoch": 0.8595905651980418, "grad_norm": 1.3960769850727262, "learning_rate": 5.0977493037778e-07, "loss": 0.4346, "step": 3863 }, { "epoch": 0.8598130841121495, "grad_norm": 1.2215902134364223, "learning_rate": 5.081908732095159e-07, "loss": 0.4049, "step": 3864 }, { "epoch": 0.8600356030262573, "grad_norm": 1.3055618747098454, "learning_rate": 5.066091492160768e-07, "loss": 0.4136, "step": 3865 }, { "epoch": 0.8602581219403649, "grad_norm": 1.2647718728751625, "learning_rate": 5.050297592190567e-07, "loss": 0.4217, "step": 3866 }, { "epoch": 0.8604806408544726, "grad_norm": 1.3593156633904693, "learning_rate": 5.034527040388359e-07, "loss": 0.426, "step": 3867 }, { "epoch": 0.8607031597685804, "grad_norm": 1.375749256333605, "learning_rate": 5.018779844945809e-07, "loss": 0.4176, "step": 3868 }, { "epoch": 0.860925678682688, "grad_norm": 1.3722422084954888, "learning_rate": 5.003056014042468e-07, "loss": 0.413, "step": 3869 }, { "epoch": 0.8611481975967957, "grad_norm": 1.4260327467004112, "learning_rate": 4.987355555845752e-07, "loss": 0.4286, "step": 3870 }, { "epoch": 0.8613707165109035, "grad_norm": 1.3500965091447001, "learning_rate": 4.971678478510927e-07, "loss": 0.4154, "step": 3871 }, { "epoch": 0.8615932354250111, "grad_norm": 1.3240130059525994, "learning_rate": 4.95602479018113e-07, "loss": 0.4163, "step": 3872 }, { "epoch": 0.8618157543391188, "grad_norm": 1.3386546003695488, "learning_rate": 4.940394498987316e-07, "loss": 0.4164, "step": 3873 }, { "epoch": 0.8620382732532266, "grad_norm": 1.326382301215368, "learning_rate": 4.924787613048316e-07, "loss": 0.4229, "step": 3874 }, { "epoch": 0.8622607921673342, "grad_norm": 1.2836502020717677, "learning_rate": 4.909204140470803e-07, "loss": 0.3986, "step": 3875 }, { "epoch": 0.8624833110814419, "grad_norm": 1.4595488120726094, "learning_rate": 4.893644089349258e-07, "loss": 0.4212, "step": 3876 }, { "epoch": 0.8627058299955496, "grad_norm": 1.2154439313099865, "learning_rate": 4.878107467766053e-07, "loss": 0.4182, "step": 3877 }, { "epoch": 0.8629283489096573, "grad_norm": 1.2860971849156848, "learning_rate": 4.862594283791328e-07, "loss": 0.4267, "step": 3878 }, { "epoch": 0.863150867823765, "grad_norm": 1.351758099779095, "learning_rate": 4.847104545483094e-07, "loss": 0.4289, "step": 3879 }, { "epoch": 0.8633733867378727, "grad_norm": 1.3266131574768196, "learning_rate": 4.831638260887156e-07, "loss": 0.4328, "step": 3880 }, { "epoch": 0.8635959056519804, "grad_norm": 1.3907291093647705, "learning_rate": 4.816195438037147e-07, "loss": 0.4244, "step": 3881 }, { "epoch": 0.8638184245660881, "grad_norm": 1.269647797107631, "learning_rate": 4.800776084954518e-07, "loss": 0.415, "step": 3882 }, { "epoch": 0.8640409434801958, "grad_norm": 1.2797947813627124, "learning_rate": 4.785380209648522e-07, "loss": 0.3975, "step": 3883 }, { "epoch": 0.8642634623943035, "grad_norm": 1.3747130833424939, "learning_rate": 4.770007820116229e-07, "loss": 0.4127, "step": 3884 }, { "epoch": 0.8644859813084113, "grad_norm": 1.2038821206631118, "learning_rate": 4.754658924342481e-07, "loss": 0.4104, "step": 3885 }, { "epoch": 0.8647085002225189, "grad_norm": 1.4708577656836088, "learning_rate": 4.7393335302999497e-07, "loss": 0.4186, "step": 3886 }, { "epoch": 0.8649310191366266, "grad_norm": 1.362998951695974, "learning_rate": 4.72403164594909e-07, "loss": 0.4099, "step": 3887 }, { "epoch": 0.8651535380507344, "grad_norm": 1.3250016748417779, "learning_rate": 4.7087532792381154e-07, "loss": 0.4029, "step": 3888 }, { "epoch": 0.865376056964842, "grad_norm": 1.4801764056714801, "learning_rate": 4.6934984381030837e-07, "loss": 0.4214, "step": 3889 }, { "epoch": 0.8655985758789497, "grad_norm": 1.3683138031589364, "learning_rate": 4.678267130467773e-07, "loss": 0.4189, "step": 3890 }, { "epoch": 0.8658210947930574, "grad_norm": 1.3433618639551645, "learning_rate": 4.6630593642437714e-07, "loss": 0.4213, "step": 3891 }, { "epoch": 0.8660436137071651, "grad_norm": 1.3012408132244127, "learning_rate": 4.647875147330433e-07, "loss": 0.4058, "step": 3892 }, { "epoch": 0.8662661326212728, "grad_norm": 1.2352509440068158, "learning_rate": 4.6327144876148643e-07, "loss": 0.4031, "step": 3893 }, { "epoch": 0.8664886515353805, "grad_norm": 1.2345929287197654, "learning_rate": 4.6175773929719615e-07, "loss": 0.4171, "step": 3894 }, { "epoch": 0.8667111704494882, "grad_norm": 1.3429484799491773, "learning_rate": 4.6024638712643563e-07, "loss": 0.4287, "step": 3895 }, { "epoch": 0.8669336893635959, "grad_norm": 1.2892087487808825, "learning_rate": 4.587373930342448e-07, "loss": 0.411, "step": 3896 }, { "epoch": 0.8671562082777036, "grad_norm": 1.386341136140136, "learning_rate": 4.572307578044405e-07, "loss": 0.4149, "step": 3897 }, { "epoch": 0.8673787271918113, "grad_norm": 1.331399316162941, "learning_rate": 4.5572648221961004e-07, "loss": 0.4241, "step": 3898 }, { "epoch": 0.867601246105919, "grad_norm": 1.2921631103168403, "learning_rate": 4.542245670611184e-07, "loss": 0.4373, "step": 3899 }, { "epoch": 0.8678237650200267, "grad_norm": 1.3503398661861543, "learning_rate": 4.527250131091027e-07, "loss": 0.4315, "step": 3900 }, { "epoch": 0.8680462839341344, "grad_norm": 1.4386281392587934, "learning_rate": 4.512278211424753e-07, "loss": 0.4226, "step": 3901 }, { "epoch": 0.8682688028482421, "grad_norm": 1.3614892003425367, "learning_rate": 4.497329919389204e-07, "loss": 0.4209, "step": 3902 }, { "epoch": 0.8684913217623498, "grad_norm": 1.3157024940318067, "learning_rate": 4.482405262748951e-07, "loss": 0.4186, "step": 3903 }, { "epoch": 0.8687138406764575, "grad_norm": 1.328219101178909, "learning_rate": 4.4675042492562993e-07, "loss": 0.4123, "step": 3904 }, { "epoch": 0.8689363595905651, "grad_norm": 1.3718985668377273, "learning_rate": 4.452626886651251e-07, "loss": 0.3912, "step": 3905 }, { "epoch": 0.8691588785046729, "grad_norm": 1.302962825661643, "learning_rate": 4.4377731826615425e-07, "loss": 0.4201, "step": 3906 }, { "epoch": 0.8693813974187806, "grad_norm": 1.3428841227451005, "learning_rate": 4.4229431450026116e-07, "loss": 0.4047, "step": 3907 }, { "epoch": 0.8696039163328882, "grad_norm": 1.3284685190122525, "learning_rate": 4.408136781377609e-07, "loss": 0.4104, "step": 3908 }, { "epoch": 0.869826435246996, "grad_norm": 1.4147874714401045, "learning_rate": 4.3933540994773927e-07, "loss": 0.4056, "step": 3909 }, { "epoch": 0.8700489541611037, "grad_norm": 1.3573834819336026, "learning_rate": 4.3785951069804986e-07, "loss": 0.413, "step": 3910 }, { "epoch": 0.8702714730752114, "grad_norm": 1.2922756471349301, "learning_rate": 4.363859811553173e-07, "loss": 0.4152, "step": 3911 }, { "epoch": 0.8704939919893191, "grad_norm": 1.3706159785059118, "learning_rate": 4.349148220849375e-07, "loss": 0.4052, "step": 3912 }, { "epoch": 0.8707165109034268, "grad_norm": 1.2921380512714584, "learning_rate": 4.334460342510688e-07, "loss": 0.3938, "step": 3913 }, { "epoch": 0.8709390298175345, "grad_norm": 1.3449796691857527, "learning_rate": 4.3197961841664584e-07, "loss": 0.4132, "step": 3914 }, { "epoch": 0.8711615487316422, "grad_norm": 1.3473762650729804, "learning_rate": 4.305155753433649e-07, "loss": 0.4074, "step": 3915 }, { "epoch": 0.8713840676457499, "grad_norm": 1.312715294739407, "learning_rate": 4.29053905791692e-07, "loss": 0.419, "step": 3916 }, { "epoch": 0.8716065865598576, "grad_norm": 1.2392961606545003, "learning_rate": 4.2759461052086224e-07, "loss": 0.4031, "step": 3917 }, { "epoch": 0.8718291054739653, "grad_norm": 1.3355185013387554, "learning_rate": 4.261376902888731e-07, "loss": 0.4132, "step": 3918 }, { "epoch": 0.8720516243880729, "grad_norm": 1.2599779628557704, "learning_rate": 4.246831458524925e-07, "loss": 0.4276, "step": 3919 }, { "epoch": 0.8722741433021807, "grad_norm": 1.464829978436213, "learning_rate": 4.2323097796725165e-07, "loss": 0.4154, "step": 3920 }, { "epoch": 0.8724966622162884, "grad_norm": 1.2972160201951382, "learning_rate": 4.2178118738744947e-07, "loss": 0.4095, "step": 3921 }, { "epoch": 0.872719181130396, "grad_norm": 1.3409101371195398, "learning_rate": 4.2033377486614734e-07, "loss": 0.3934, "step": 3922 }, { "epoch": 0.8729417000445038, "grad_norm": 1.3498842102808497, "learning_rate": 4.1888874115517395e-07, "loss": 0.4133, "step": 3923 }, { "epoch": 0.8731642189586115, "grad_norm": 1.3144207326236874, "learning_rate": 4.1744608700512224e-07, "loss": 0.4164, "step": 3924 }, { "epoch": 0.8733867378727191, "grad_norm": 1.2907267034300045, "learning_rate": 4.1600581316534494e-07, "loss": 0.4054, "step": 3925 }, { "epoch": 0.8736092567868269, "grad_norm": 1.2912804475241197, "learning_rate": 4.1456792038396645e-07, "loss": 0.419, "step": 3926 }, { "epoch": 0.8738317757009346, "grad_norm": 1.3957968176890163, "learning_rate": 4.1313240940786537e-07, "loss": 0.4155, "step": 3927 }, { "epoch": 0.8740542946150422, "grad_norm": 1.2777956547843496, "learning_rate": 4.116992809826897e-07, "loss": 0.4099, "step": 3928 }, { "epoch": 0.87427681352915, "grad_norm": 1.3341279288722392, "learning_rate": 4.10268535852848e-07, "loss": 0.4168, "step": 3929 }, { "epoch": 0.8744993324432577, "grad_norm": 1.3497333850037698, "learning_rate": 4.088401747615084e-07, "loss": 0.4105, "step": 3930 }, { "epoch": 0.8747218513573654, "grad_norm": 1.382109721293753, "learning_rate": 4.074141984506036e-07, "loss": 0.4137, "step": 3931 }, { "epoch": 0.8749443702714731, "grad_norm": 1.461282252407938, "learning_rate": 4.059906076608272e-07, "loss": 0.4069, "step": 3932 }, { "epoch": 0.8751668891855807, "grad_norm": 1.3307713964643249, "learning_rate": 4.045694031316327e-07, "loss": 0.4032, "step": 3933 }, { "epoch": 0.8753894080996885, "grad_norm": 1.378971421077977, "learning_rate": 4.031505856012352e-07, "loss": 0.3923, "step": 3934 }, { "epoch": 0.8756119270137962, "grad_norm": 1.2567571370528614, "learning_rate": 4.017341558066085e-07, "loss": 0.4004, "step": 3935 }, { "epoch": 0.8758344459279038, "grad_norm": 1.331901282981156, "learning_rate": 4.0032011448348727e-07, "loss": 0.41, "step": 3936 }, { "epoch": 0.8760569648420116, "grad_norm": 1.2875114866986996, "learning_rate": 3.9890846236636636e-07, "loss": 0.4009, "step": 3937 }, { "epoch": 0.8762794837561193, "grad_norm": 1.3289934382155502, "learning_rate": 3.974992001884953e-07, "loss": 0.4193, "step": 3938 }, { "epoch": 0.8765020026702269, "grad_norm": 1.322690384415854, "learning_rate": 3.960923286818896e-07, "loss": 0.4117, "step": 3939 }, { "epoch": 0.8767245215843347, "grad_norm": 1.4149464348605958, "learning_rate": 3.9468784857731534e-07, "loss": 0.4077, "step": 3940 }, { "epoch": 0.8769470404984424, "grad_norm": 1.3785323077967528, "learning_rate": 3.932857606043028e-07, "loss": 0.4385, "step": 3941 }, { "epoch": 0.87716955941255, "grad_norm": 1.327312187615511, "learning_rate": 3.9188606549113386e-07, "loss": 0.4103, "step": 3942 }, { "epoch": 0.8773920783266578, "grad_norm": 1.2495413895227123, "learning_rate": 3.904887639648519e-07, "loss": 0.4077, "step": 3943 }, { "epoch": 0.8776145972407655, "grad_norm": 1.4375862396627497, "learning_rate": 3.8909385675125534e-07, "loss": 0.3999, "step": 3944 }, { "epoch": 0.8778371161548731, "grad_norm": 1.4022618152003912, "learning_rate": 3.8770134457489896e-07, "loss": 0.4192, "step": 3945 }, { "epoch": 0.8780596350689809, "grad_norm": 1.2846979041521216, "learning_rate": 3.8631122815909473e-07, "loss": 0.4124, "step": 3946 }, { "epoch": 0.8782821539830885, "grad_norm": 1.3794473863369938, "learning_rate": 3.849235082259073e-07, "loss": 0.4081, "step": 3947 }, { "epoch": 0.8785046728971962, "grad_norm": 1.2900209079002498, "learning_rate": 3.835381854961595e-07, "loss": 0.398, "step": 3948 }, { "epoch": 0.878727191811304, "grad_norm": 1.34987518880479, "learning_rate": 3.82155260689428e-07, "loss": 0.4195, "step": 3949 }, { "epoch": 0.8789497107254116, "grad_norm": 1.3175779631410383, "learning_rate": 3.8077473452404145e-07, "loss": 0.4339, "step": 3950 }, { "epoch": 0.8791722296395194, "grad_norm": 1.3489969758366376, "learning_rate": 3.793966077170885e-07, "loss": 0.4066, "step": 3951 }, { "epoch": 0.8793947485536271, "grad_norm": 1.3655967683023538, "learning_rate": 3.780208809844049e-07, "loss": 0.4106, "step": 3952 }, { "epoch": 0.8796172674677347, "grad_norm": 1.4135480828969056, "learning_rate": 3.7664755504058405e-07, "loss": 0.4017, "step": 3953 }, { "epoch": 0.8798397863818425, "grad_norm": 1.3103420771603147, "learning_rate": 3.752766305989708e-07, "loss": 0.4187, "step": 3954 }, { "epoch": 0.8800623052959502, "grad_norm": 1.4129237342350613, "learning_rate": 3.7390810837166224e-07, "loss": 0.4211, "step": 3955 }, { "epoch": 0.8802848242100578, "grad_norm": 1.3792955410338343, "learning_rate": 3.725419890695081e-07, "loss": 0.4333, "step": 3956 }, { "epoch": 0.8805073431241656, "grad_norm": 1.3194369949205564, "learning_rate": 3.711782734021102e-07, "loss": 0.401, "step": 3957 }, { "epoch": 0.8807298620382733, "grad_norm": 1.2662462749348296, "learning_rate": 3.698169620778219e-07, "loss": 0.4155, "step": 3958 }, { "epoch": 0.8809523809523809, "grad_norm": 1.441843193335488, "learning_rate": 3.684580558037482e-07, "loss": 0.4335, "step": 3959 }, { "epoch": 0.8811748998664887, "grad_norm": 1.3635339448826502, "learning_rate": 3.671015552857427e-07, "loss": 0.4247, "step": 3960 }, { "epoch": 0.8813974187805963, "grad_norm": 1.4638403523445676, "learning_rate": 3.6574746122841176e-07, "loss": 0.4015, "step": 3961 }, { "epoch": 0.881619937694704, "grad_norm": 1.496213920017208, "learning_rate": 3.64395774335109e-07, "loss": 0.407, "step": 3962 }, { "epoch": 0.8818424566088118, "grad_norm": 1.4247217983817622, "learning_rate": 3.630464953079427e-07, "loss": 0.4241, "step": 3963 }, { "epoch": 0.8820649755229194, "grad_norm": 1.3869920854018336, "learning_rate": 3.616996248477639e-07, "loss": 0.4071, "step": 3964 }, { "epoch": 0.8822874944370271, "grad_norm": 1.4243596166449823, "learning_rate": 3.603551636541774e-07, "loss": 0.3946, "step": 3965 }, { "epoch": 0.8825100133511349, "grad_norm": 1.2719838953840843, "learning_rate": 3.5901311242553585e-07, "loss": 0.4177, "step": 3966 }, { "epoch": 0.8827325322652425, "grad_norm": 1.4119984849013536, "learning_rate": 3.576734718589375e-07, "loss": 0.4369, "step": 3967 }, { "epoch": 0.8829550511793502, "grad_norm": 1.383181418299333, "learning_rate": 3.5633624265023093e-07, "loss": 0.4299, "step": 3968 }, { "epoch": 0.883177570093458, "grad_norm": 1.208341384624465, "learning_rate": 3.5500142549401097e-07, "loss": 0.4207, "step": 3969 }, { "epoch": 0.8834000890075656, "grad_norm": 1.3366805929707684, "learning_rate": 3.536690210836208e-07, "loss": 0.4226, "step": 3970 }, { "epoch": 0.8836226079216734, "grad_norm": 1.391705096644721, "learning_rate": 3.523390301111501e-07, "loss": 0.4149, "step": 3971 }, { "epoch": 0.8838451268357811, "grad_norm": 1.3807892311342687, "learning_rate": 3.51011453267433e-07, "loss": 0.4226, "step": 3972 }, { "epoch": 0.8840676457498887, "grad_norm": 1.309539128822509, "learning_rate": 3.496862912420518e-07, "loss": 0.4205, "step": 3973 }, { "epoch": 0.8842901646639965, "grad_norm": 1.2733973329706143, "learning_rate": 3.4836354472333413e-07, "loss": 0.4033, "step": 3974 }, { "epoch": 0.8845126835781041, "grad_norm": 1.3408423677903543, "learning_rate": 3.470432143983504e-07, "loss": 0.4035, "step": 3975 }, { "epoch": 0.8847352024922118, "grad_norm": 1.4037260583637197, "learning_rate": 3.4572530095292213e-07, "loss": 0.4161, "step": 3976 }, { "epoch": 0.8849577214063196, "grad_norm": 1.622923715372208, "learning_rate": 3.444098050716077e-07, "loss": 0.4119, "step": 3977 }, { "epoch": 0.8851802403204272, "grad_norm": 1.4025757958407452, "learning_rate": 3.4309672743771506e-07, "loss": 0.4191, "step": 3978 }, { "epoch": 0.8854027592345349, "grad_norm": 1.358111248080334, "learning_rate": 3.4178606873329577e-07, "loss": 0.4181, "step": 3979 }, { "epoch": 0.8856252781486427, "grad_norm": 1.3121181781782372, "learning_rate": 3.404778296391409e-07, "loss": 0.4135, "step": 3980 }, { "epoch": 0.8858477970627503, "grad_norm": 1.506825552096407, "learning_rate": 3.39172010834789e-07, "loss": 0.4082, "step": 3981 }, { "epoch": 0.886070315976858, "grad_norm": 1.3610167655707486, "learning_rate": 3.378686129985198e-07, "loss": 0.4038, "step": 3982 }, { "epoch": 0.8862928348909658, "grad_norm": 1.323616717938954, "learning_rate": 3.365676368073567e-07, "loss": 0.416, "step": 3983 }, { "epoch": 0.8865153538050734, "grad_norm": 1.2957222619116173, "learning_rate": 3.35269082937062e-07, "loss": 0.4166, "step": 3984 }, { "epoch": 0.8867378727191811, "grad_norm": 1.3189563270857947, "learning_rate": 3.3397295206214266e-07, "loss": 0.4101, "step": 3985 }, { "epoch": 0.8869603916332889, "grad_norm": 1.3812059651477444, "learning_rate": 3.326792448558475e-07, "loss": 0.4026, "step": 3986 }, { "epoch": 0.8871829105473965, "grad_norm": 1.4166398227008117, "learning_rate": 3.3138796199016274e-07, "loss": 0.4276, "step": 3987 }, { "epoch": 0.8874054294615042, "grad_norm": 1.219354619063811, "learning_rate": 3.3009910413582147e-07, "loss": 0.4105, "step": 3988 }, { "epoch": 0.8876279483756119, "grad_norm": 1.3081508191753874, "learning_rate": 3.288126719622903e-07, "loss": 0.4248, "step": 3989 }, { "epoch": 0.8878504672897196, "grad_norm": 1.401703819765197, "learning_rate": 3.2752866613778023e-07, "loss": 0.4048, "step": 3990 }, { "epoch": 0.8880729862038274, "grad_norm": 1.354524381898504, "learning_rate": 3.2624708732924226e-07, "loss": 0.427, "step": 3991 }, { "epoch": 0.888295505117935, "grad_norm": 1.484844236020377, "learning_rate": 3.249679362023622e-07, "loss": 0.4317, "step": 3992 }, { "epoch": 0.8885180240320427, "grad_norm": 1.3296848670130066, "learning_rate": 3.2369121342157027e-07, "loss": 0.415, "step": 3993 }, { "epoch": 0.8887405429461505, "grad_norm": 1.4604324958334742, "learning_rate": 3.224169196500321e-07, "loss": 0.4407, "step": 3994 }, { "epoch": 0.8889630618602581, "grad_norm": 1.3424256115464925, "learning_rate": 3.211450555496531e-07, "loss": 0.4321, "step": 3995 }, { "epoch": 0.8891855807743658, "grad_norm": 1.3360390679086862, "learning_rate": 3.198756217810761e-07, "loss": 0.4121, "step": 3996 }, { "epoch": 0.8894080996884736, "grad_norm": 1.2811877330011754, "learning_rate": 3.1860861900368024e-07, "loss": 0.4036, "step": 3997 }, { "epoch": 0.8896306186025812, "grad_norm": 1.287359636215394, "learning_rate": 3.17344047875584e-07, "loss": 0.3918, "step": 3998 }, { "epoch": 0.8898531375166889, "grad_norm": 1.4169411460072892, "learning_rate": 3.1608190905364265e-07, "loss": 0.4103, "step": 3999 }, { "epoch": 0.8900756564307967, "grad_norm": 1.34426187625905, "learning_rate": 3.1482220319344613e-07, "loss": 0.4216, "step": 4000 }, { "epoch": 0.8902981753449043, "grad_norm": 1.3166395304930358, "learning_rate": 3.135649309493238e-07, "loss": 0.4178, "step": 4001 }, { "epoch": 0.890520694259012, "grad_norm": 1.3800267300674582, "learning_rate": 3.123100929743372e-07, "loss": 0.4312, "step": 4002 }, { "epoch": 0.8907432131731197, "grad_norm": 1.3735083230993899, "learning_rate": 3.1105768992028607e-07, "loss": 0.4344, "step": 4003 }, { "epoch": 0.8909657320872274, "grad_norm": 1.2780149919421482, "learning_rate": 3.0980772243770384e-07, "loss": 0.3993, "step": 4004 }, { "epoch": 0.8911882510013351, "grad_norm": 1.326611923748671, "learning_rate": 3.08560191175859e-07, "loss": 0.3945, "step": 4005 }, { "epoch": 0.8914107699154428, "grad_norm": 1.451734289872811, "learning_rate": 3.0731509678275816e-07, "loss": 0.3984, "step": 4006 }, { "epoch": 0.8916332888295505, "grad_norm": 1.3587502752968115, "learning_rate": 3.060724399051362e-07, "loss": 0.4084, "step": 4007 }, { "epoch": 0.8918558077436582, "grad_norm": 1.366069788071066, "learning_rate": 3.0483222118846633e-07, "loss": 0.4149, "step": 4008 }, { "epoch": 0.8920783266577659, "grad_norm": 1.3811504387984903, "learning_rate": 3.0359444127695314e-07, "loss": 0.4193, "step": 4009 }, { "epoch": 0.8923008455718736, "grad_norm": 1.2410161895838605, "learning_rate": 3.023591008135346e-07, "loss": 0.436, "step": 4010 }, { "epoch": 0.8925233644859814, "grad_norm": 1.3617105446090767, "learning_rate": 3.0112620043988404e-07, "loss": 0.3959, "step": 4011 }, { "epoch": 0.892745883400089, "grad_norm": 1.210164749202628, "learning_rate": 2.998957407964026e-07, "loss": 0.3957, "step": 4012 }, { "epoch": 0.8929684023141967, "grad_norm": 1.1873786015907384, "learning_rate": 2.9866772252222896e-07, "loss": 0.407, "step": 4013 }, { "epoch": 0.8931909212283045, "grad_norm": 1.225242518992647, "learning_rate": 2.974421462552296e-07, "loss": 0.4082, "step": 4014 }, { "epoch": 0.8934134401424121, "grad_norm": 1.4055669251988225, "learning_rate": 2.962190126320047e-07, "loss": 0.4056, "step": 4015 }, { "epoch": 0.8936359590565198, "grad_norm": 1.2536467193305003, "learning_rate": 2.949983222878855e-07, "loss": 0.4009, "step": 4016 }, { "epoch": 0.8938584779706275, "grad_norm": 1.2903495945590013, "learning_rate": 2.9378007585693204e-07, "loss": 0.4011, "step": 4017 }, { "epoch": 0.8940809968847352, "grad_norm": 1.4893650081364223, "learning_rate": 2.925642739719381e-07, "loss": 0.3959, "step": 4018 }, { "epoch": 0.8943035157988429, "grad_norm": 1.3448793837891149, "learning_rate": 2.913509172644252e-07, "loss": 0.4222, "step": 4019 }, { "epoch": 0.8945260347129506, "grad_norm": 1.3858069332615148, "learning_rate": 2.901400063646459e-07, "loss": 0.41, "step": 4020 }, { "epoch": 0.8947485536270583, "grad_norm": 1.3088217989626096, "learning_rate": 2.889315419015831e-07, "loss": 0.4211, "step": 4021 }, { "epoch": 0.894971072541166, "grad_norm": 1.2558098662873614, "learning_rate": 2.87725524502947e-07, "loss": 0.4212, "step": 4022 }, { "epoch": 0.8951935914552737, "grad_norm": 1.4134223524995129, "learning_rate": 2.8652195479517806e-07, "loss": 0.4359, "step": 4023 }, { "epoch": 0.8954161103693814, "grad_norm": 1.4404062048187958, "learning_rate": 2.853208334034441e-07, "loss": 0.4225, "step": 4024 }, { "epoch": 0.8956386292834891, "grad_norm": 1.387211724655465, "learning_rate": 2.8412216095164314e-07, "loss": 0.415, "step": 4025 }, { "epoch": 0.8958611481975968, "grad_norm": 1.322654319376851, "learning_rate": 2.8292593806240054e-07, "loss": 0.4023, "step": 4026 }, { "epoch": 0.8960836671117045, "grad_norm": 1.3799104388384122, "learning_rate": 2.8173216535706757e-07, "loss": 0.4221, "step": 4027 }, { "epoch": 0.8963061860258122, "grad_norm": 1.2934458306250332, "learning_rate": 2.805408434557255e-07, "loss": 0.4031, "step": 4028 }, { "epoch": 0.8965287049399199, "grad_norm": 1.2381482528147056, "learning_rate": 2.793519729771793e-07, "loss": 0.4099, "step": 4029 }, { "epoch": 0.8967512238540276, "grad_norm": 1.456966937010647, "learning_rate": 2.7816555453896386e-07, "loss": 0.4284, "step": 4030 }, { "epoch": 0.8969737427681352, "grad_norm": 1.4078121482659072, "learning_rate": 2.7698158875733907e-07, "loss": 0.4268, "step": 4031 }, { "epoch": 0.897196261682243, "grad_norm": 1.2906814870248522, "learning_rate": 2.758000762472901e-07, "loss": 0.4138, "step": 4032 }, { "epoch": 0.8974187805963507, "grad_norm": 1.3788733606957826, "learning_rate": 2.7462101762253003e-07, "loss": 0.4097, "step": 4033 }, { "epoch": 0.8976412995104583, "grad_norm": 1.2879934316037724, "learning_rate": 2.734444134954933e-07, "loss": 0.4378, "step": 4034 }, { "epoch": 0.8978638184245661, "grad_norm": 1.2972352255071213, "learning_rate": 2.7227026447734393e-07, "loss": 0.4231, "step": 4035 }, { "epoch": 0.8980863373386738, "grad_norm": 1.371196448420922, "learning_rate": 2.710985711779679e-07, "loss": 0.4192, "step": 4036 }, { "epoch": 0.8983088562527815, "grad_norm": 1.32800441801452, "learning_rate": 2.6992933420597666e-07, "loss": 0.4076, "step": 4037 }, { "epoch": 0.8985313751668892, "grad_norm": 1.2940586985963187, "learning_rate": 2.687625541687061e-07, "loss": 0.406, "step": 4038 }, { "epoch": 0.8987538940809969, "grad_norm": 1.2494453680439064, "learning_rate": 2.6759823167221363e-07, "loss": 0.4015, "step": 4039 }, { "epoch": 0.8989764129951046, "grad_norm": 1.3381084003033112, "learning_rate": 2.664363673212833e-07, "loss": 0.4188, "step": 4040 }, { "epoch": 0.8991989319092123, "grad_norm": 1.3259360944351986, "learning_rate": 2.652769617194212e-07, "loss": 0.4117, "step": 4041 }, { "epoch": 0.89942145082332, "grad_norm": 1.2257369365146407, "learning_rate": 2.6412001546885334e-07, "loss": 0.4062, "step": 4042 }, { "epoch": 0.8996439697374277, "grad_norm": 1.3659187429519801, "learning_rate": 2.629655291705341e-07, "loss": 0.4202, "step": 4043 }, { "epoch": 0.8998664886515354, "grad_norm": 1.3344463842135361, "learning_rate": 2.618135034241354e-07, "loss": 0.4113, "step": 4044 }, { "epoch": 0.900089007565643, "grad_norm": 1.2039483800407438, "learning_rate": 2.606639388280524e-07, "loss": 0.4102, "step": 4045 }, { "epoch": 0.9003115264797508, "grad_norm": 1.3606400505260272, "learning_rate": 2.59516835979402e-07, "loss": 0.4236, "step": 4046 }, { "epoch": 0.9005340453938585, "grad_norm": 1.2308174174787652, "learning_rate": 2.583721954740226e-07, "loss": 0.4022, "step": 4047 }, { "epoch": 0.9007565643079661, "grad_norm": 1.2318560052689065, "learning_rate": 2.5723001790647464e-07, "loss": 0.407, "step": 4048 }, { "epoch": 0.9009790832220739, "grad_norm": 1.3451637693547471, "learning_rate": 2.5609030387003466e-07, "loss": 0.429, "step": 4049 }, { "epoch": 0.9012016021361816, "grad_norm": 1.2557981908710532, "learning_rate": 2.5495305395670635e-07, "loss": 0.4261, "step": 4050 }, { "epoch": 0.9014241210502892, "grad_norm": 1.2839004895570802, "learning_rate": 2.538182687572083e-07, "loss": 0.4142, "step": 4051 }, { "epoch": 0.901646639964397, "grad_norm": 1.3059795905608973, "learning_rate": 2.5268594886098066e-07, "loss": 0.4106, "step": 4052 }, { "epoch": 0.9018691588785047, "grad_norm": 1.3530426390673256, "learning_rate": 2.515560948561846e-07, "loss": 0.4416, "step": 4053 }, { "epoch": 0.9020916777926123, "grad_norm": 1.236025124530386, "learning_rate": 2.504287073296957e-07, "loss": 0.4007, "step": 4054 }, { "epoch": 0.9023141967067201, "grad_norm": 1.3547949700496384, "learning_rate": 2.493037868671139e-07, "loss": 0.409, "step": 4055 }, { "epoch": 0.9025367156208278, "grad_norm": 1.314457026629656, "learning_rate": 2.4818133405275444e-07, "loss": 0.4251, "step": 4056 }, { "epoch": 0.9027592345349355, "grad_norm": 1.3936137722794533, "learning_rate": 2.4706134946965167e-07, "loss": 0.4131, "step": 4057 }, { "epoch": 0.9029817534490432, "grad_norm": 1.3485944661465257, "learning_rate": 2.4594383369955787e-07, "loss": 0.4185, "step": 4058 }, { "epoch": 0.9032042723631508, "grad_norm": 1.3345599457384756, "learning_rate": 2.448287873229427e-07, "loss": 0.4155, "step": 4059 }, { "epoch": 0.9034267912772586, "grad_norm": 1.4281375567886594, "learning_rate": 2.4371621091899335e-07, "loss": 0.4156, "step": 4060 }, { "epoch": 0.9036493101913663, "grad_norm": 1.3123555749656692, "learning_rate": 2.4260610506561364e-07, "loss": 0.4276, "step": 4061 }, { "epoch": 0.9038718291054739, "grad_norm": 1.3197730079336258, "learning_rate": 2.414984703394252e-07, "loss": 0.4082, "step": 4062 }, { "epoch": 0.9040943480195817, "grad_norm": 1.3214983644880838, "learning_rate": 2.403933073157655e-07, "loss": 0.4294, "step": 4063 }, { "epoch": 0.9043168669336894, "grad_norm": 1.3530415455954108, "learning_rate": 2.39290616568687e-07, "loss": 0.4093, "step": 4064 }, { "epoch": 0.904539385847797, "grad_norm": 1.4254876399522516, "learning_rate": 2.381903986709605e-07, "loss": 0.4371, "step": 4065 }, { "epoch": 0.9047619047619048, "grad_norm": 1.2562790664424441, "learning_rate": 2.370926541940688e-07, "loss": 0.4027, "step": 4066 }, { "epoch": 0.9049844236760125, "grad_norm": 1.292799845524376, "learning_rate": 2.359973837082119e-07, "loss": 0.4001, "step": 4067 }, { "epoch": 0.9052069425901201, "grad_norm": 1.368564450604494, "learning_rate": 2.3490458778230752e-07, "loss": 0.4074, "step": 4068 }, { "epoch": 0.9054294615042279, "grad_norm": 1.3291121347130064, "learning_rate": 2.3381426698398324e-07, "loss": 0.4244, "step": 4069 }, { "epoch": 0.9056519804183356, "grad_norm": 1.2940546861563536, "learning_rate": 2.3272642187958327e-07, "loss": 0.4267, "step": 4070 }, { "epoch": 0.9058744993324432, "grad_norm": 1.306683567844987, "learning_rate": 2.316410530341656e-07, "loss": 0.4003, "step": 4071 }, { "epoch": 0.906097018246551, "grad_norm": 1.3863336130364698, "learning_rate": 2.3055816101150262e-07, "loss": 0.413, "step": 4072 }, { "epoch": 0.9063195371606586, "grad_norm": 1.25284826761231, "learning_rate": 2.2947774637407883e-07, "loss": 0.4055, "step": 4073 }, { "epoch": 0.9065420560747663, "grad_norm": 1.3539043039281797, "learning_rate": 2.283998096830925e-07, "loss": 0.4112, "step": 4074 }, { "epoch": 0.9067645749888741, "grad_norm": 1.2595233082839756, "learning_rate": 2.2732435149845687e-07, "loss": 0.4003, "step": 4075 }, { "epoch": 0.9069870939029817, "grad_norm": 1.3360180307991745, "learning_rate": 2.262513723787929e-07, "loss": 0.398, "step": 4076 }, { "epoch": 0.9072096128170895, "grad_norm": 1.406457930933747, "learning_rate": 2.2518087288143918e-07, "loss": 0.4334, "step": 4077 }, { "epoch": 0.9074321317311972, "grad_norm": 1.4052062884668761, "learning_rate": 2.2411285356244318e-07, "loss": 0.4316, "step": 4078 }, { "epoch": 0.9076546506453048, "grad_norm": 1.3057951689526428, "learning_rate": 2.230473149765633e-07, "loss": 0.4193, "step": 4079 }, { "epoch": 0.9078771695594126, "grad_norm": 1.4957582000201821, "learning_rate": 2.2198425767727406e-07, "loss": 0.4253, "step": 4080 }, { "epoch": 0.9080996884735203, "grad_norm": 1.3735793337645754, "learning_rate": 2.2092368221675542e-07, "loss": 0.4128, "step": 4081 }, { "epoch": 0.9083222073876279, "grad_norm": 1.322854861963027, "learning_rate": 2.1986558914590173e-07, "loss": 0.4106, "step": 4082 }, { "epoch": 0.9085447263017357, "grad_norm": 1.349868943994501, "learning_rate": 2.1880997901431778e-07, "loss": 0.4073, "step": 4083 }, { "epoch": 0.9087672452158434, "grad_norm": 1.3061329270379671, "learning_rate": 2.1775685237031553e-07, "loss": 0.4109, "step": 4084 }, { "epoch": 0.908989764129951, "grad_norm": 1.3029442847292796, "learning_rate": 2.1670620976092127e-07, "loss": 0.4178, "step": 4085 }, { "epoch": 0.9092122830440588, "grad_norm": 1.3233574599998876, "learning_rate": 2.1565805173186792e-07, "loss": 0.4004, "step": 4086 }, { "epoch": 0.9094348019581664, "grad_norm": 1.2846863390045355, "learning_rate": 2.1461237882759945e-07, "loss": 0.4177, "step": 4087 }, { "epoch": 0.9096573208722741, "grad_norm": 1.2565742795187704, "learning_rate": 2.135691915912691e-07, "loss": 0.4175, "step": 4088 }, { "epoch": 0.9098798397863819, "grad_norm": 1.3144993336459672, "learning_rate": 2.1252849056473678e-07, "loss": 0.4057, "step": 4089 }, { "epoch": 0.9101023587004895, "grad_norm": 1.2813138189268982, "learning_rate": 2.11490276288574e-07, "loss": 0.4167, "step": 4090 }, { "epoch": 0.9103248776145972, "grad_norm": 1.2967328528481852, "learning_rate": 2.1045454930205766e-07, "loss": 0.413, "step": 4091 }, { "epoch": 0.910547396528705, "grad_norm": 1.2829433622875508, "learning_rate": 2.0942131014317469e-07, "loss": 0.4164, "step": 4092 }, { "epoch": 0.9107699154428126, "grad_norm": 1.4609087258631734, "learning_rate": 2.0839055934861961e-07, "loss": 0.4338, "step": 4093 }, { "epoch": 0.9109924343569203, "grad_norm": 1.345368353092138, "learning_rate": 2.0736229745379366e-07, "loss": 0.411, "step": 4094 }, { "epoch": 0.9112149532710281, "grad_norm": 1.2695435012765535, "learning_rate": 2.063365249928062e-07, "loss": 0.396, "step": 4095 }, { "epoch": 0.9114374721851357, "grad_norm": 1.2583002760997546, "learning_rate": 2.0531324249847218e-07, "loss": 0.3978, "step": 4096 }, { "epoch": 0.9116599910992434, "grad_norm": 1.3364276892619653, "learning_rate": 2.0429245050231415e-07, "loss": 0.4072, "step": 4097 }, { "epoch": 0.9118825100133512, "grad_norm": 1.2400079238697561, "learning_rate": 2.032741495345608e-07, "loss": 0.4172, "step": 4098 }, { "epoch": 0.9121050289274588, "grad_norm": 1.320980256502441, "learning_rate": 2.0225834012414737e-07, "loss": 0.4096, "step": 4099 }, { "epoch": 0.9123275478415666, "grad_norm": 1.3630632183609046, "learning_rate": 2.0124502279871504e-07, "loss": 0.4254, "step": 4100 }, { "epoch": 0.9125500667556742, "grad_norm": 1.3137207259479495, "learning_rate": 2.0023419808460842e-07, "loss": 0.398, "step": 4101 }, { "epoch": 0.9127725856697819, "grad_norm": 1.4422368967141577, "learning_rate": 1.9922586650687913e-07, "loss": 0.4064, "step": 4102 }, { "epoch": 0.9129951045838897, "grad_norm": 1.297602628067039, "learning_rate": 1.9822002858928546e-07, "loss": 0.4167, "step": 4103 }, { "epoch": 0.9132176234979973, "grad_norm": 1.316316890214157, "learning_rate": 1.972166848542856e-07, "loss": 0.416, "step": 4104 }, { "epoch": 0.913440142412105, "grad_norm": 1.3895416136297754, "learning_rate": 1.962158358230476e-07, "loss": 0.4037, "step": 4105 }, { "epoch": 0.9136626613262128, "grad_norm": 1.298873980510256, "learning_rate": 1.952174820154401e-07, "loss": 0.4125, "step": 4106 }, { "epoch": 0.9138851802403204, "grad_norm": 1.2474364246671312, "learning_rate": 1.9422162395003775e-07, "loss": 0.4037, "step": 4107 }, { "epoch": 0.9141076991544281, "grad_norm": 1.297148037970763, "learning_rate": 1.9322826214411616e-07, "loss": 0.406, "step": 4108 }, { "epoch": 0.9143302180685359, "grad_norm": 1.3930357775349893, "learning_rate": 1.9223739711365762e-07, "loss": 0.4069, "step": 4109 }, { "epoch": 0.9145527369826435, "grad_norm": 1.4183592449958022, "learning_rate": 1.9124902937334488e-07, "loss": 0.4123, "step": 4110 }, { "epoch": 0.9147752558967512, "grad_norm": 1.3328023149331403, "learning_rate": 1.9026315943656502e-07, "loss": 0.4047, "step": 4111 }, { "epoch": 0.914997774810859, "grad_norm": 1.2958518342066023, "learning_rate": 1.892797878154079e-07, "loss": 0.4346, "step": 4112 }, { "epoch": 0.9152202937249666, "grad_norm": 1.2807162769214866, "learning_rate": 1.8829891502066379e-07, "loss": 0.4062, "step": 4113 }, { "epoch": 0.9154428126390743, "grad_norm": 1.3724628081589436, "learning_rate": 1.8732054156182622e-07, "loss": 0.4238, "step": 4114 }, { "epoch": 0.915665331553182, "grad_norm": 1.5209689202217929, "learning_rate": 1.8634466794709205e-07, "loss": 0.4397, "step": 4115 }, { "epoch": 0.9158878504672897, "grad_norm": 1.3147762640663205, "learning_rate": 1.8537129468335636e-07, "loss": 0.4188, "step": 4116 }, { "epoch": 0.9161103693813974, "grad_norm": 1.2999143529178603, "learning_rate": 1.8440042227621856e-07, "loss": 0.3955, "step": 4117 }, { "epoch": 0.9163328882955051, "grad_norm": 1.3280786828272526, "learning_rate": 1.8343205122997643e-07, "loss": 0.4148, "step": 4118 }, { "epoch": 0.9165554072096128, "grad_norm": 1.3364429860480993, "learning_rate": 1.8246618204763034e-07, "loss": 0.4092, "step": 4119 }, { "epoch": 0.9167779261237206, "grad_norm": 1.3586763023621826, "learning_rate": 1.8150281523088175e-07, "loss": 0.4212, "step": 4120 }, { "epoch": 0.9170004450378282, "grad_norm": 1.2921122490773638, "learning_rate": 1.8054195128012874e-07, "loss": 0.4027, "step": 4121 }, { "epoch": 0.9172229639519359, "grad_norm": 1.3790014351343665, "learning_rate": 1.7958359069447318e-07, "loss": 0.4212, "step": 4122 }, { "epoch": 0.9174454828660437, "grad_norm": 1.3426764255653043, "learning_rate": 1.7862773397171407e-07, "loss": 0.4243, "step": 4123 }, { "epoch": 0.9176680017801513, "grad_norm": 1.309025945955543, "learning_rate": 1.7767438160835205e-07, "loss": 0.4102, "step": 4124 }, { "epoch": 0.917890520694259, "grad_norm": 1.3400797316361424, "learning_rate": 1.7672353409958597e-07, "loss": 0.4076, "step": 4125 }, { "epoch": 0.9181130396083668, "grad_norm": 1.2552623490684351, "learning_rate": 1.7577519193931248e-07, "loss": 0.4025, "step": 4126 }, { "epoch": 0.9183355585224744, "grad_norm": 1.2991210280282597, "learning_rate": 1.7482935562012804e-07, "loss": 0.4163, "step": 4127 }, { "epoch": 0.9185580774365821, "grad_norm": 1.2332022366481448, "learning_rate": 1.7388602563332636e-07, "loss": 0.4119, "step": 4128 }, { "epoch": 0.9187805963506898, "grad_norm": 1.4584214999647296, "learning_rate": 1.7294520246890046e-07, "loss": 0.4053, "step": 4129 }, { "epoch": 0.9190031152647975, "grad_norm": 1.2851408531062145, "learning_rate": 1.7200688661554276e-07, "loss": 0.4043, "step": 4130 }, { "epoch": 0.9192256341789052, "grad_norm": 1.2831557170465597, "learning_rate": 1.7107107856063954e-07, "loss": 0.401, "step": 4131 }, { "epoch": 0.9194481530930129, "grad_norm": 1.4401596252696853, "learning_rate": 1.7013777879027803e-07, "loss": 0.4236, "step": 4132 }, { "epoch": 0.9196706720071206, "grad_norm": 1.3859996107478834, "learning_rate": 1.6920698778923882e-07, "loss": 0.3909, "step": 4133 }, { "epoch": 0.9198931909212283, "grad_norm": 1.2264803344258366, "learning_rate": 1.6827870604100295e-07, "loss": 0.4138, "step": 4134 }, { "epoch": 0.920115709835336, "grad_norm": 1.3088936327217922, "learning_rate": 1.673529340277469e-07, "loss": 0.417, "step": 4135 }, { "epoch": 0.9203382287494437, "grad_norm": 1.3752630144512705, "learning_rate": 1.6642967223034213e-07, "loss": 0.4117, "step": 4136 }, { "epoch": 0.9205607476635514, "grad_norm": 1.3349433602156013, "learning_rate": 1.6550892112835837e-07, "loss": 0.4083, "step": 4137 }, { "epoch": 0.9207832665776591, "grad_norm": 1.3556639443650977, "learning_rate": 1.645906812000597e-07, "loss": 0.4272, "step": 4138 }, { "epoch": 0.9210057854917668, "grad_norm": 1.2906841993528404, "learning_rate": 1.6367495292240686e-07, "loss": 0.4203, "step": 4139 }, { "epoch": 0.9212283044058746, "grad_norm": 1.3065237794673008, "learning_rate": 1.627617367710549e-07, "loss": 0.4153, "step": 4140 }, { "epoch": 0.9214508233199822, "grad_norm": 1.298444067056576, "learning_rate": 1.6185103322035435e-07, "loss": 0.409, "step": 4141 }, { "epoch": 0.9216733422340899, "grad_norm": 1.2530731873987448, "learning_rate": 1.6094284274335182e-07, "loss": 0.4188, "step": 4142 }, { "epoch": 0.9218958611481975, "grad_norm": 1.2228058410551408, "learning_rate": 1.600371658117861e-07, "loss": 0.4067, "step": 4143 }, { "epoch": 0.9221183800623053, "grad_norm": 1.3065533567914258, "learning_rate": 1.591340028960936e-07, "loss": 0.422, "step": 4144 }, { "epoch": 0.922340898976413, "grad_norm": 1.4026807715069625, "learning_rate": 1.5823335446540188e-07, "loss": 0.4161, "step": 4145 }, { "epoch": 0.9225634178905207, "grad_norm": 1.3449005905970097, "learning_rate": 1.5733522098753396e-07, "loss": 0.4068, "step": 4146 }, { "epoch": 0.9227859368046284, "grad_norm": 1.3198098413376427, "learning_rate": 1.5643960292900607e-07, "loss": 0.3866, "step": 4147 }, { "epoch": 0.9230084557187361, "grad_norm": 1.4359377658990196, "learning_rate": 1.5554650075502775e-07, "loss": 0.436, "step": 4148 }, { "epoch": 0.9232309746328438, "grad_norm": 1.30062206502113, "learning_rate": 1.546559149295024e-07, "loss": 0.4224, "step": 4149 }, { "epoch": 0.9234534935469515, "grad_norm": 1.329045909994268, "learning_rate": 1.5376784591502658e-07, "loss": 0.3976, "step": 4150 }, { "epoch": 0.9236760124610592, "grad_norm": 1.3497443201532053, "learning_rate": 1.5288229417288746e-07, "loss": 0.4169, "step": 4151 }, { "epoch": 0.9238985313751669, "grad_norm": 1.2773178509763699, "learning_rate": 1.519992601630671e-07, "loss": 0.4234, "step": 4152 }, { "epoch": 0.9241210502892746, "grad_norm": 1.3093237988310036, "learning_rate": 1.5111874434423746e-07, "loss": 0.4225, "step": 4153 }, { "epoch": 0.9243435692033823, "grad_norm": 1.3550255985984425, "learning_rate": 1.5024074717376601e-07, "loss": 0.4171, "step": 4154 }, { "epoch": 0.92456608811749, "grad_norm": 1.340421912128872, "learning_rate": 1.4936526910770742e-07, "loss": 0.4174, "step": 4155 }, { "epoch": 0.9247886070315977, "grad_norm": 1.3663007604337356, "learning_rate": 1.4849231060081126e-07, "loss": 0.4296, "step": 4156 }, { "epoch": 0.9250111259457053, "grad_norm": 1.3597909776607067, "learning_rate": 1.4762187210651813e-07, "loss": 0.3902, "step": 4157 }, { "epoch": 0.9252336448598131, "grad_norm": 1.275216438349158, "learning_rate": 1.4675395407695692e-07, "loss": 0.4004, "step": 4158 }, { "epoch": 0.9254561637739208, "grad_norm": 1.326480677458972, "learning_rate": 1.4588855696295035e-07, "loss": 0.408, "step": 4159 }, { "epoch": 0.9256786826880284, "grad_norm": 1.2499823609891716, "learning_rate": 1.4502568121400994e-07, "loss": 0.4222, "step": 4160 }, { "epoch": 0.9259012016021362, "grad_norm": 1.244070778319195, "learning_rate": 1.4416532727833888e-07, "loss": 0.4056, "step": 4161 }, { "epoch": 0.9261237205162439, "grad_norm": 1.3215798353446861, "learning_rate": 1.433074956028302e-07, "loss": 0.3952, "step": 4162 }, { "epoch": 0.9263462394303515, "grad_norm": 1.3798447468684214, "learning_rate": 1.424521866330647e-07, "loss": 0.4177, "step": 4163 }, { "epoch": 0.9265687583444593, "grad_norm": 1.3530865388391233, "learning_rate": 1.4159940081331536e-07, "loss": 0.428, "step": 4164 }, { "epoch": 0.926791277258567, "grad_norm": 1.3008485735143864, "learning_rate": 1.407491385865445e-07, "loss": 0.4109, "step": 4165 }, { "epoch": 0.9270137961726747, "grad_norm": 1.3495252319684425, "learning_rate": 1.3990140039440104e-07, "loss": 0.4083, "step": 4166 }, { "epoch": 0.9272363150867824, "grad_norm": 1.336363490192697, "learning_rate": 1.390561866772261e-07, "loss": 0.4215, "step": 4167 }, { "epoch": 0.9274588340008901, "grad_norm": 1.2666495579562878, "learning_rate": 1.382134978740468e-07, "loss": 0.4065, "step": 4168 }, { "epoch": 0.9276813529149978, "grad_norm": 1.399376010505721, "learning_rate": 1.3737333442258084e-07, "loss": 0.4082, "step": 4169 }, { "epoch": 0.9279038718291055, "grad_norm": 1.3457015152059302, "learning_rate": 1.3653569675923296e-07, "loss": 0.4107, "step": 4170 }, { "epoch": 0.9281263907432131, "grad_norm": 1.2331321619868674, "learning_rate": 1.357005853190957e-07, "loss": 0.4032, "step": 4171 }, { "epoch": 0.9283489096573209, "grad_norm": 1.3339493951688852, "learning_rate": 1.3486800053595095e-07, "loss": 0.4143, "step": 4172 }, { "epoch": 0.9285714285714286, "grad_norm": 1.2725440644377926, "learning_rate": 1.340379428422661e-07, "loss": 0.3932, "step": 4173 }, { "epoch": 0.9287939474855362, "grad_norm": 1.3062121466639975, "learning_rate": 1.3321041266919854e-07, "loss": 0.4018, "step": 4174 }, { "epoch": 0.929016466399644, "grad_norm": 1.3917791455096942, "learning_rate": 1.3238541044658992e-07, "loss": 0.4188, "step": 4175 }, { "epoch": 0.9292389853137517, "grad_norm": 1.3060002857998185, "learning_rate": 1.3156293660297025e-07, "loss": 0.4136, "step": 4176 }, { "epoch": 0.9294615042278593, "grad_norm": 1.267137404645151, "learning_rate": 1.307429915655567e-07, "loss": 0.4032, "step": 4177 }, { "epoch": 0.9296840231419671, "grad_norm": 1.3539440344816143, "learning_rate": 1.2992557576025078e-07, "loss": 0.4191, "step": 4178 }, { "epoch": 0.9299065420560748, "grad_norm": 1.3924947946471031, "learning_rate": 1.2911068961164454e-07, "loss": 0.3972, "step": 4179 }, { "epoch": 0.9301290609701824, "grad_norm": 1.3164606759490132, "learning_rate": 1.2829833354301047e-07, "loss": 0.4191, "step": 4180 }, { "epoch": 0.9303515798842902, "grad_norm": 1.4121854673872187, "learning_rate": 1.2748850797631164e-07, "loss": 0.4239, "step": 4181 }, { "epoch": 0.9305740987983978, "grad_norm": 1.3909015313002686, "learning_rate": 1.2668121333219375e-07, "loss": 0.4157, "step": 4182 }, { "epoch": 0.9307966177125055, "grad_norm": 1.3094205888795045, "learning_rate": 1.2587645002998862e-07, "loss": 0.423, "step": 4183 }, { "epoch": 0.9310191366266133, "grad_norm": 1.2846750607878017, "learning_rate": 1.2507421848771405e-07, "loss": 0.4046, "step": 4184 }, { "epoch": 0.9312416555407209, "grad_norm": 1.3670219140156297, "learning_rate": 1.2427451912207235e-07, "loss": 0.4226, "step": 4185 }, { "epoch": 0.9314641744548287, "grad_norm": 1.267755615534782, "learning_rate": 1.234773523484495e-07, "loss": 0.3944, "step": 4186 }, { "epoch": 0.9316866933689364, "grad_norm": 1.2364739125381892, "learning_rate": 1.2268271858091817e-07, "loss": 0.3785, "step": 4187 }, { "epoch": 0.931909212283044, "grad_norm": 1.3175637424405733, "learning_rate": 1.2189061823223214e-07, "loss": 0.4061, "step": 4188 }, { "epoch": 0.9321317311971518, "grad_norm": 1.2724146605251894, "learning_rate": 1.2110105171383336e-07, "loss": 0.4003, "step": 4189 }, { "epoch": 0.9323542501112595, "grad_norm": 1.4486634022581049, "learning_rate": 1.2031401943584265e-07, "loss": 0.4256, "step": 4190 }, { "epoch": 0.9325767690253671, "grad_norm": 1.330982077841958, "learning_rate": 1.1952952180706966e-07, "loss": 0.4065, "step": 4191 }, { "epoch": 0.9327992879394749, "grad_norm": 1.3066031680645742, "learning_rate": 1.1874755923500402e-07, "loss": 0.4071, "step": 4192 }, { "epoch": 0.9330218068535826, "grad_norm": 1.2970907153661895, "learning_rate": 1.1796813212581971e-07, "loss": 0.4096, "step": 4193 }, { "epoch": 0.9332443257676902, "grad_norm": 1.3328393480093568, "learning_rate": 1.1719124088437395e-07, "loss": 0.4209, "step": 4194 }, { "epoch": 0.933466844681798, "grad_norm": 1.1979716846171597, "learning_rate": 1.1641688591420508e-07, "loss": 0.4122, "step": 4195 }, { "epoch": 0.9336893635959056, "grad_norm": 1.5373457835767124, "learning_rate": 1.156450676175369e-07, "loss": 0.4235, "step": 4196 }, { "epoch": 0.9339118825100133, "grad_norm": 1.2524123258810533, "learning_rate": 1.1487578639527264e-07, "loss": 0.3906, "step": 4197 }, { "epoch": 0.9341344014241211, "grad_norm": 1.4450016663271454, "learning_rate": 1.141090426470004e-07, "loss": 0.4152, "step": 4198 }, { "epoch": 0.9343569203382287, "grad_norm": 1.3364899277306008, "learning_rate": 1.1334483677098829e-07, "loss": 0.4144, "step": 4199 }, { "epoch": 0.9345794392523364, "grad_norm": 1.3814609836379788, "learning_rate": 1.1258316916418655e-07, "loss": 0.4042, "step": 4200 }, { "epoch": 0.9348019581664442, "grad_norm": 1.3130079969149464, "learning_rate": 1.1182404022222759e-07, "loss": 0.406, "step": 4201 }, { "epoch": 0.9350244770805518, "grad_norm": 1.2838365150543611, "learning_rate": 1.110674503394249e-07, "loss": 0.393, "step": 4202 }, { "epoch": 0.9352469959946595, "grad_norm": 1.2647648391130464, "learning_rate": 1.1031339990877243e-07, "loss": 0.3839, "step": 4203 }, { "epoch": 0.9354695149087673, "grad_norm": 1.2625967721550573, "learning_rate": 1.0956188932194689e-07, "loss": 0.4076, "step": 4204 }, { "epoch": 0.9356920338228749, "grad_norm": 1.399886781973375, "learning_rate": 1.0881291896930324e-07, "loss": 0.4155, "step": 4205 }, { "epoch": 0.9359145527369827, "grad_norm": 1.3357207087801708, "learning_rate": 1.0806648923987862e-07, "loss": 0.3897, "step": 4206 }, { "epoch": 0.9361370716510904, "grad_norm": 1.3054882901489586, "learning_rate": 1.0732260052139065e-07, "loss": 0.4159, "step": 4207 }, { "epoch": 0.936359590565198, "grad_norm": 1.2328237421732744, "learning_rate": 1.0658125320023582e-07, "loss": 0.4115, "step": 4208 }, { "epoch": 0.9365821094793058, "grad_norm": 1.407655383718175, "learning_rate": 1.058424476614911e-07, "loss": 0.4307, "step": 4209 }, { "epoch": 0.9368046283934134, "grad_norm": 2.0105618013844717, "learning_rate": 1.0510618428891395e-07, "loss": 0.4065, "step": 4210 }, { "epoch": 0.9370271473075211, "grad_norm": 1.2557423577847506, "learning_rate": 1.0437246346494012e-07, "loss": 0.4194, "step": 4211 }, { "epoch": 0.9372496662216289, "grad_norm": 1.3411269945231503, "learning_rate": 1.0364128557068642e-07, "loss": 0.418, "step": 4212 }, { "epoch": 0.9374721851357365, "grad_norm": 1.416112180186424, "learning_rate": 1.0291265098594628e-07, "loss": 0.4119, "step": 4213 }, { "epoch": 0.9376947040498442, "grad_norm": 1.7858422862846697, "learning_rate": 1.0218656008919469e-07, "loss": 0.4168, "step": 4214 }, { "epoch": 0.937917222963952, "grad_norm": 1.2552260803938855, "learning_rate": 1.0146301325758279e-07, "loss": 0.4035, "step": 4215 }, { "epoch": 0.9381397418780596, "grad_norm": 1.2250285337568458, "learning_rate": 1.0074201086694324e-07, "loss": 0.4113, "step": 4216 }, { "epoch": 0.9383622607921673, "grad_norm": 1.307983896945295, "learning_rate": 1.000235532917837e-07, "loss": 0.4144, "step": 4217 }, { "epoch": 0.9385847797062751, "grad_norm": 1.234909503879163, "learning_rate": 9.930764090529288e-08, "loss": 0.3971, "step": 4218 }, { "epoch": 0.9388072986203827, "grad_norm": 1.3858829847425027, "learning_rate": 9.859427407933609e-08, "loss": 0.4195, "step": 4219 }, { "epoch": 0.9390298175344904, "grad_norm": 1.3194501661357738, "learning_rate": 9.788345318445636e-08, "loss": 0.4141, "step": 4220 }, { "epoch": 0.9392523364485982, "grad_norm": 1.3430763273751283, "learning_rate": 9.717517858987446e-08, "loss": 0.4164, "step": 4221 }, { "epoch": 0.9394748553627058, "grad_norm": 1.2985445982634047, "learning_rate": 9.646945066348834e-08, "loss": 0.4226, "step": 4222 }, { "epoch": 0.9396973742768135, "grad_norm": 1.2456202871978974, "learning_rate": 9.57662697718742e-08, "loss": 0.4049, "step": 4223 }, { "epoch": 0.9399198931909212, "grad_norm": 1.3379540011973472, "learning_rate": 9.506563628028376e-08, "loss": 0.4058, "step": 4224 }, { "epoch": 0.9401424121050289, "grad_norm": 1.317620769362483, "learning_rate": 9.436755055264646e-08, "loss": 0.4072, "step": 4225 }, { "epoch": 0.9403649310191367, "grad_norm": 1.3323243798750526, "learning_rate": 9.367201295156725e-08, "loss": 0.398, "step": 4226 }, { "epoch": 0.9405874499332443, "grad_norm": 1.3747287960628018, "learning_rate": 9.29790238383299e-08, "loss": 0.4224, "step": 4227 }, { "epoch": 0.940809968847352, "grad_norm": 1.364239346668323, "learning_rate": 9.22885835728915e-08, "loss": 0.4186, "step": 4228 }, { "epoch": 0.9410324877614598, "grad_norm": 1.2351604089823933, "learning_rate": 9.160069251388792e-08, "loss": 0.3883, "step": 4229 }, { "epoch": 0.9412550066755674, "grad_norm": 1.242549793405318, "learning_rate": 9.091535101862837e-08, "loss": 0.4122, "step": 4230 }, { "epoch": 0.9414775255896751, "grad_norm": 1.340133086282148, "learning_rate": 9.023255944309972e-08, "loss": 0.4126, "step": 4231 }, { "epoch": 0.9417000445037829, "grad_norm": 1.383340877225334, "learning_rate": 8.955231814196274e-08, "loss": 0.4141, "step": 4232 }, { "epoch": 0.9419225634178905, "grad_norm": 1.3747564745971794, "learning_rate": 8.88746274685548e-08, "loss": 0.4215, "step": 4233 }, { "epoch": 0.9421450823319982, "grad_norm": 1.282268289737826, "learning_rate": 8.819948777488819e-08, "loss": 0.4041, "step": 4234 }, { "epoch": 0.942367601246106, "grad_norm": 1.4163744211930724, "learning_rate": 8.75268994116496e-08, "loss": 0.4148, "step": 4235 }, { "epoch": 0.9425901201602136, "grad_norm": 1.3535663109947114, "learning_rate": 8.685686272820071e-08, "loss": 0.4097, "step": 4236 }, { "epoch": 0.9428126390743213, "grad_norm": 1.4281654210168928, "learning_rate": 8.618937807257754e-08, "loss": 0.4291, "step": 4237 }, { "epoch": 0.943035157988429, "grad_norm": 1.2462239552428902, "learning_rate": 8.552444579149167e-08, "loss": 0.4215, "step": 4238 }, { "epoch": 0.9432576769025367, "grad_norm": 1.299921749658804, "learning_rate": 8.486206623032734e-08, "loss": 0.4038, "step": 4239 }, { "epoch": 0.9434801958166444, "grad_norm": 1.424814191121319, "learning_rate": 8.420223973314324e-08, "loss": 0.4197, "step": 4240 }, { "epoch": 0.9437027147307521, "grad_norm": 1.25230777640715, "learning_rate": 8.354496664267354e-08, "loss": 0.4111, "step": 4241 }, { "epoch": 0.9439252336448598, "grad_norm": 1.2689701131957862, "learning_rate": 8.289024730032346e-08, "loss": 0.4092, "step": 4242 }, { "epoch": 0.9441477525589675, "grad_norm": 1.244944998284207, "learning_rate": 8.223808204617378e-08, "loss": 0.4092, "step": 4243 }, { "epoch": 0.9443702714730752, "grad_norm": 1.3605819432872543, "learning_rate": 8.158847121897795e-08, "loss": 0.3974, "step": 4244 }, { "epoch": 0.9445927903871829, "grad_norm": 1.1947387510293688, "learning_rate": 8.094141515616161e-08, "loss": 0.4171, "step": 4245 }, { "epoch": 0.9448153093012907, "grad_norm": 1.244571449498985, "learning_rate": 8.029691419382534e-08, "loss": 0.3952, "step": 4246 }, { "epoch": 0.9450378282153983, "grad_norm": 1.3101146405148827, "learning_rate": 7.965496866674083e-08, "loss": 0.4189, "step": 4247 }, { "epoch": 0.945260347129506, "grad_norm": 1.2616736951049967, "learning_rate": 7.901557890835299e-08, "loss": 0.4042, "step": 4248 }, { "epoch": 0.9454828660436138, "grad_norm": 1.3783310346246223, "learning_rate": 7.837874525078004e-08, "loss": 0.421, "step": 4249 }, { "epoch": 0.9457053849577214, "grad_norm": 1.322792354500663, "learning_rate": 7.774446802481128e-08, "loss": 0.3894, "step": 4250 }, { "epoch": 0.9459279038718291, "grad_norm": 1.2041782343705252, "learning_rate": 7.711274755990816e-08, "loss": 0.4049, "step": 4251 }, { "epoch": 0.9461504227859368, "grad_norm": 1.3379735261250145, "learning_rate": 7.648358418420432e-08, "loss": 0.4293, "step": 4252 }, { "epoch": 0.9463729417000445, "grad_norm": 1.2570527771733562, "learning_rate": 7.585697822450611e-08, "loss": 0.4054, "step": 4253 }, { "epoch": 0.9465954606141522, "grad_norm": 1.3485774246322078, "learning_rate": 7.523293000629039e-08, "loss": 0.3962, "step": 4254 }, { "epoch": 0.9468179795282599, "grad_norm": 1.3859460086828768, "learning_rate": 7.461143985370567e-08, "loss": 0.4297, "step": 4255 }, { "epoch": 0.9470404984423676, "grad_norm": 1.2598046749397032, "learning_rate": 7.399250808957204e-08, "loss": 0.4166, "step": 4256 }, { "epoch": 0.9472630173564753, "grad_norm": 1.2557664672036937, "learning_rate": 7.337613503537954e-08, "loss": 0.4098, "step": 4257 }, { "epoch": 0.947485536270583, "grad_norm": 1.3607835847885494, "learning_rate": 7.276232101129099e-08, "loss": 0.4129, "step": 4258 }, { "epoch": 0.9477080551846907, "grad_norm": 1.3202434102437506, "learning_rate": 7.215106633613855e-08, "loss": 0.4092, "step": 4259 }, { "epoch": 0.9479305740987984, "grad_norm": 1.2748233464629226, "learning_rate": 7.154237132742603e-08, "loss": 0.3913, "step": 4260 }, { "epoch": 0.9481530930129061, "grad_norm": 1.3329676503421644, "learning_rate": 7.093623630132663e-08, "loss": 0.3932, "step": 4261 }, { "epoch": 0.9483756119270138, "grad_norm": 1.2532435460844906, "learning_rate": 7.033266157268459e-08, "loss": 0.4086, "step": 4262 }, { "epoch": 0.9485981308411215, "grad_norm": 1.3354733322279335, "learning_rate": 6.97316474550136e-08, "loss": 0.4113, "step": 4263 }, { "epoch": 0.9488206497552292, "grad_norm": 1.2541848628776375, "learning_rate": 6.913319426049836e-08, "loss": 0.4141, "step": 4264 }, { "epoch": 0.9490431686693369, "grad_norm": 1.2770691104792378, "learning_rate": 6.85373022999919e-08, "loss": 0.4053, "step": 4265 }, { "epoch": 0.9492656875834445, "grad_norm": 1.247791909552818, "learning_rate": 6.794397188301827e-08, "loss": 0.4082, "step": 4266 }, { "epoch": 0.9494882064975523, "grad_norm": 1.3858516941304655, "learning_rate": 6.735320331776984e-08, "loss": 0.4249, "step": 4267 }, { "epoch": 0.94971072541166, "grad_norm": 1.2354046827515357, "learning_rate": 6.676499691110894e-08, "loss": 0.4186, "step": 4268 }, { "epoch": 0.9499332443257676, "grad_norm": 1.288324070177518, "learning_rate": 6.617935296856781e-08, "loss": 0.4117, "step": 4269 }, { "epoch": 0.9501557632398754, "grad_norm": 1.2686363202098356, "learning_rate": 6.55962717943448e-08, "loss": 0.4145, "step": 4270 }, { "epoch": 0.9503782821539831, "grad_norm": 1.2387570346237453, "learning_rate": 6.501575369131041e-08, "loss": 0.426, "step": 4271 }, { "epoch": 0.9506008010680908, "grad_norm": 1.2750116071207735, "learning_rate": 6.443779896100233e-08, "loss": 0.4047, "step": 4272 }, { "epoch": 0.9508233199821985, "grad_norm": 1.2077929522298616, "learning_rate": 6.386240790362708e-08, "loss": 0.3949, "step": 4273 }, { "epoch": 0.9510458388963062, "grad_norm": 1.3081944618661712, "learning_rate": 6.328958081805892e-08, "loss": 0.4242, "step": 4274 }, { "epoch": 0.9512683578104139, "grad_norm": 1.331212457488294, "learning_rate": 6.271931800184039e-08, "loss": 0.4169, "step": 4275 }, { "epoch": 0.9514908767245216, "grad_norm": 1.4504877938971679, "learning_rate": 6.215161975118289e-08, "loss": 0.4108, "step": 4276 }, { "epoch": 0.9517133956386293, "grad_norm": 1.4006283987602597, "learning_rate": 6.158648636096442e-08, "loss": 0.4132, "step": 4277 }, { "epoch": 0.951935914552737, "grad_norm": 1.3704545580781684, "learning_rate": 6.102391812473296e-08, "loss": 0.4186, "step": 4278 }, { "epoch": 0.9521584334668447, "grad_norm": 1.379104249403875, "learning_rate": 6.046391533470142e-08, "loss": 0.434, "step": 4279 }, { "epoch": 0.9523809523809523, "grad_norm": 1.415656531943263, "learning_rate": 5.990647828175211e-08, "loss": 0.4317, "step": 4280 }, { "epoch": 0.9526034712950601, "grad_norm": 1.2562018836251132, "learning_rate": 5.935160725543343e-08, "loss": 0.3996, "step": 4281 }, { "epoch": 0.9528259902091678, "grad_norm": 1.4054118045991808, "learning_rate": 5.879930254396149e-08, "loss": 0.4048, "step": 4282 }, { "epoch": 0.9530485091232754, "grad_norm": 1.402001759485775, "learning_rate": 5.824956443421903e-08, "loss": 0.3873, "step": 4283 }, { "epoch": 0.9532710280373832, "grad_norm": 1.2530491445948138, "learning_rate": 5.7702393211755966e-08, "loss": 0.4137, "step": 4284 }, { "epoch": 0.9534935469514909, "grad_norm": 1.277273381684704, "learning_rate": 5.715778916078885e-08, "loss": 0.3946, "step": 4285 }, { "epoch": 0.9537160658655985, "grad_norm": 1.314962903461756, "learning_rate": 5.661575256420082e-08, "loss": 0.4292, "step": 4286 }, { "epoch": 0.9539385847797063, "grad_norm": 1.3894825397238206, "learning_rate": 5.6076283703541125e-08, "loss": 0.4065, "step": 4287 }, { "epoch": 0.954161103693814, "grad_norm": 1.3813524196045754, "learning_rate": 5.553938285902505e-08, "loss": 0.4113, "step": 4288 }, { "epoch": 0.9543836226079216, "grad_norm": 1.213656715499369, "learning_rate": 5.500505030953451e-08, "loss": 0.4156, "step": 4289 }, { "epoch": 0.9546061415220294, "grad_norm": 1.386242478317034, "learning_rate": 5.44732863326175e-08, "loss": 0.4133, "step": 4290 }, { "epoch": 0.9548286604361371, "grad_norm": 1.2712919331038222, "learning_rate": 5.394409120448807e-08, "loss": 0.4095, "step": 4291 }, { "epoch": 0.9550511793502447, "grad_norm": 1.2092155342851911, "learning_rate": 5.3417465200023555e-08, "loss": 0.3809, "step": 4292 }, { "epoch": 0.9552736982643525, "grad_norm": 1.2981674346980319, "learning_rate": 5.28934085927707e-08, "loss": 0.4192, "step": 4293 }, { "epoch": 0.9554962171784601, "grad_norm": 1.3582512703355718, "learning_rate": 5.237192165493843e-08, "loss": 0.4071, "step": 4294 }, { "epoch": 0.9557187360925679, "grad_norm": 1.245731358294342, "learning_rate": 5.185300465740117e-08, "loss": 0.4081, "step": 4295 }, { "epoch": 0.9559412550066756, "grad_norm": 1.3278021102403985, "learning_rate": 5.133665786970166e-08, "loss": 0.3939, "step": 4296 }, { "epoch": 0.9561637739207832, "grad_norm": 1.3411538220933814, "learning_rate": 5.082288156004367e-08, "loss": 0.4062, "step": 4297 }, { "epoch": 0.956386292834891, "grad_norm": 1.5581644041770586, "learning_rate": 5.031167599529763e-08, "loss": 0.3972, "step": 4298 }, { "epoch": 0.9566088117489987, "grad_norm": 1.265173471414971, "learning_rate": 4.98030414409989e-08, "loss": 0.3964, "step": 4299 }, { "epoch": 0.9568313306631063, "grad_norm": 1.430288701686148, "learning_rate": 4.929697816134615e-08, "loss": 0.3955, "step": 4300 }, { "epoch": 0.9570538495772141, "grad_norm": 1.3829551206262074, "learning_rate": 4.879348641920356e-08, "loss": 0.4119, "step": 4301 }, { "epoch": 0.9572763684913218, "grad_norm": 1.2881946550948993, "learning_rate": 4.829256647609914e-08, "loss": 0.4101, "step": 4302 }, { "epoch": 0.9574988874054294, "grad_norm": 1.3011856600599807, "learning_rate": 4.779421859222533e-08, "loss": 0.4083, "step": 4303 }, { "epoch": 0.9577214063195372, "grad_norm": 1.3818344565464034, "learning_rate": 4.7298443026438377e-08, "loss": 0.3936, "step": 4304 }, { "epoch": 0.9579439252336449, "grad_norm": 1.3474907150459012, "learning_rate": 4.680524003625786e-08, "loss": 0.4145, "step": 4305 }, { "epoch": 0.9581664441477525, "grad_norm": 1.3107952603961577, "learning_rate": 4.6314609877868843e-08, "loss": 0.3922, "step": 4306 }, { "epoch": 0.9583889630618603, "grad_norm": 1.2065362313654462, "learning_rate": 4.582655280611692e-08, "loss": 0.3962, "step": 4307 }, { "epoch": 0.9586114819759679, "grad_norm": 1.3748623216184943, "learning_rate": 4.5341069074514297e-08, "loss": 0.4362, "step": 4308 }, { "epoch": 0.9588340008900756, "grad_norm": 1.3813837640959246, "learning_rate": 4.4858158935234264e-08, "loss": 0.4075, "step": 4309 }, { "epoch": 0.9590565198041834, "grad_norm": 1.34412760302249, "learning_rate": 4.437782263911505e-08, "loss": 0.4143, "step": 4310 }, { "epoch": 0.959279038718291, "grad_norm": 1.3010369707724905, "learning_rate": 4.390006043565764e-08, "loss": 0.4105, "step": 4311 }, { "epoch": 0.9595015576323987, "grad_norm": 1.30017864387915, "learning_rate": 4.3424872573023525e-08, "loss": 0.3868, "step": 4312 }, { "epoch": 0.9597240765465065, "grad_norm": 1.3577191058867062, "learning_rate": 4.295225929804081e-08, "loss": 0.409, "step": 4313 }, { "epoch": 0.9599465954606141, "grad_norm": 1.369442417699258, "learning_rate": 4.2482220856197023e-08, "loss": 0.4129, "step": 4314 }, { "epoch": 0.9601691143747219, "grad_norm": 1.29844791981919, "learning_rate": 4.201475749164463e-08, "loss": 0.412, "step": 4315 }, { "epoch": 0.9603916332888296, "grad_norm": 1.2516575680164455, "learning_rate": 4.154986944719774e-08, "loss": 0.3979, "step": 4316 }, { "epoch": 0.9606141522029372, "grad_norm": 1.3320710757400929, "learning_rate": 4.1087556964331533e-08, "loss": 0.4047, "step": 4317 }, { "epoch": 0.960836671117045, "grad_norm": 1.3181233373596288, "learning_rate": 4.062782028318502e-08, "loss": 0.3935, "step": 4318 }, { "epoch": 0.9610591900311527, "grad_norm": 1.4776516723639697, "learning_rate": 4.017065964255884e-08, "loss": 0.4108, "step": 4319 }, { "epoch": 0.9612817089452603, "grad_norm": 1.2341420280661735, "learning_rate": 3.971607527991472e-08, "loss": 0.4108, "step": 4320 }, { "epoch": 0.9615042278593681, "grad_norm": 1.3432447424867335, "learning_rate": 3.9264067431377116e-08, "loss": 0.4123, "step": 4321 }, { "epoch": 0.9617267467734757, "grad_norm": 1.302992816216218, "learning_rate": 3.8814636331732106e-08, "loss": 0.4195, "step": 4322 }, { "epoch": 0.9619492656875834, "grad_norm": 1.2820672097277608, "learning_rate": 3.836778221442738e-08, "loss": 0.4062, "step": 4323 }, { "epoch": 0.9621717846016912, "grad_norm": 1.225479869557091, "learning_rate": 3.7923505311571184e-08, "loss": 0.3836, "step": 4324 }, { "epoch": 0.9623943035157988, "grad_norm": 1.361696836113354, "learning_rate": 3.748180585393391e-08, "loss": 0.4046, "step": 4325 }, { "epoch": 0.9626168224299065, "grad_norm": 1.3305606308627416, "learning_rate": 3.7042684070947574e-08, "loss": 0.3949, "step": 4326 }, { "epoch": 0.9628393413440143, "grad_norm": 1.3541392611414083, "learning_rate": 3.6606140190703633e-08, "loss": 0.4139, "step": 4327 }, { "epoch": 0.9630618602581219, "grad_norm": 1.3121540392701287, "learning_rate": 3.617217443995624e-08, "loss": 0.4318, "step": 4328 }, { "epoch": 0.9632843791722296, "grad_norm": 1.335726713243354, "learning_rate": 3.574078704411954e-08, "loss": 0.4183, "step": 4329 }, { "epoch": 0.9635068980863374, "grad_norm": 1.2961087657475658, "learning_rate": 3.5311978227268176e-08, "loss": 0.4053, "step": 4330 }, { "epoch": 0.963729417000445, "grad_norm": 1.233037670080142, "learning_rate": 3.488574821213897e-08, "loss": 0.4225, "step": 4331 }, { "epoch": 0.9639519359145527, "grad_norm": 1.2591992690972336, "learning_rate": 3.4462097220125945e-08, "loss": 0.4085, "step": 4332 }, { "epoch": 0.9641744548286605, "grad_norm": 1.3568172787878767, "learning_rate": 3.4041025471287515e-08, "loss": 0.4086, "step": 4333 }, { "epoch": 0.9643969737427681, "grad_norm": 1.3793755613954926, "learning_rate": 3.3622533184339836e-08, "loss": 0.3951, "step": 4334 }, { "epoch": 0.9646194926568759, "grad_norm": 1.34420989153965, "learning_rate": 3.320662057665958e-08, "loss": 0.4063, "step": 4335 }, { "epoch": 0.9648420115709835, "grad_norm": 1.3354158664596396, "learning_rate": 3.279328786428393e-08, "loss": 0.4079, "step": 4336 }, { "epoch": 0.9650645304850912, "grad_norm": 1.3368716976366555, "learning_rate": 3.238253526191004e-08, "loss": 0.3918, "step": 4337 }, { "epoch": 0.965287049399199, "grad_norm": 1.2806758007242867, "learning_rate": 3.197436298289392e-08, "loss": 0.4071, "step": 4338 }, { "epoch": 0.9655095683133066, "grad_norm": 1.292706793414563, "learning_rate": 3.1568771239252615e-08, "loss": 0.3988, "step": 4339 }, { "epoch": 0.9657320872274143, "grad_norm": 1.4577953006782196, "learning_rate": 3.1165760241662066e-08, "loss": 0.4225, "step": 4340 }, { "epoch": 0.9659546061415221, "grad_norm": 1.1742091016364018, "learning_rate": 3.07653301994576e-08, "loss": 0.4038, "step": 4341 }, { "epoch": 0.9661771250556297, "grad_norm": 1.390541448102375, "learning_rate": 3.036748132063394e-08, "loss": 0.4183, "step": 4342 }, { "epoch": 0.9663996439697374, "grad_norm": 1.324032655896946, "learning_rate": 2.9972213811845786e-08, "loss": 0.4061, "step": 4343 }, { "epoch": 0.9666221628838452, "grad_norm": 1.3277515555022201, "learning_rate": 2.9579527878405568e-08, "loss": 0.4023, "step": 4344 }, { "epoch": 0.9668446817979528, "grad_norm": 1.3704485826061343, "learning_rate": 2.9189423724286792e-08, "loss": 0.4034, "step": 4345 }, { "epoch": 0.9670672007120605, "grad_norm": 1.3411286677780374, "learning_rate": 2.880190155212015e-08, "loss": 0.4053, "step": 4346 }, { "epoch": 0.9672897196261683, "grad_norm": 1.4091207844274316, "learning_rate": 2.8416961563195178e-08, "loss": 0.4098, "step": 4347 }, { "epoch": 0.9675122385402759, "grad_norm": 1.3300024504119543, "learning_rate": 2.8034603957461938e-08, "loss": 0.4195, "step": 4348 }, { "epoch": 0.9677347574543836, "grad_norm": 1.2926153881238118, "learning_rate": 2.7654828933527667e-08, "loss": 0.3898, "step": 4349 }, { "epoch": 0.9679572763684913, "grad_norm": 1.2390002704392244, "learning_rate": 2.7277636688657904e-08, "loss": 0.4084, "step": 4350 }, { "epoch": 0.968179795282599, "grad_norm": 1.3359812580782295, "learning_rate": 2.6903027418777038e-08, "loss": 0.4082, "step": 4351 }, { "epoch": 0.9684023141967067, "grad_norm": 1.2823281587341757, "learning_rate": 2.6531001318468862e-08, "loss": 0.4174, "step": 4352 }, { "epoch": 0.9686248331108144, "grad_norm": 1.4589285273090071, "learning_rate": 2.61615585809738e-08, "loss": 0.4275, "step": 4353 }, { "epoch": 0.9688473520249221, "grad_norm": 1.2433297914626147, "learning_rate": 2.5794699398191125e-08, "loss": 0.4104, "step": 4354 }, { "epoch": 0.9690698709390299, "grad_norm": 1.2583336537066858, "learning_rate": 2.543042396067785e-08, "loss": 0.4141, "step": 4355 }, { "epoch": 0.9692923898531375, "grad_norm": 1.859637933198996, "learning_rate": 2.5068732457649292e-08, "loss": 0.4026, "step": 4356 }, { "epoch": 0.9695149087672452, "grad_norm": 1.3145872026997316, "learning_rate": 2.4709625076978494e-08, "loss": 0.3994, "step": 4357 }, { "epoch": 0.969737427681353, "grad_norm": 1.2692659758157965, "learning_rate": 2.435310200519625e-08, "loss": 0.412, "step": 4358 }, { "epoch": 0.9699599465954606, "grad_norm": 1.3225095999889824, "learning_rate": 2.3999163427490535e-08, "loss": 0.4177, "step": 4359 }, { "epoch": 0.9701824655095683, "grad_norm": 1.2180542321847956, "learning_rate": 2.364780952770762e-08, "loss": 0.4184, "step": 4360 }, { "epoch": 0.9704049844236761, "grad_norm": 1.3708383730214369, "learning_rate": 2.3299040488350412e-08, "loss": 0.4084, "step": 4361 }, { "epoch": 0.9706275033377837, "grad_norm": 1.340460654487031, "learning_rate": 2.2952856490579544e-08, "loss": 0.3987, "step": 4362 }, { "epoch": 0.9708500222518914, "grad_norm": 1.3240621806984985, "learning_rate": 2.2609257714213407e-08, "loss": 0.4095, "step": 4363 }, { "epoch": 0.9710725411659991, "grad_norm": 1.2896357269458283, "learning_rate": 2.2268244337727008e-08, "loss": 0.4016, "step": 4364 }, { "epoch": 0.9712950600801068, "grad_norm": 1.2559734512000726, "learning_rate": 2.1929816538252545e-08, "loss": 0.3897, "step": 4365 }, { "epoch": 0.9715175789942145, "grad_norm": 1.280586447535342, "learning_rate": 2.159397449157885e-08, "loss": 0.4172, "step": 4366 }, { "epoch": 0.9717400979083222, "grad_norm": 1.3009283776513418, "learning_rate": 2.1260718372151933e-08, "loss": 0.4058, "step": 4367 }, { "epoch": 0.9719626168224299, "grad_norm": 1.2882429580010648, "learning_rate": 2.0930048353074995e-08, "loss": 0.4148, "step": 4368 }, { "epoch": 0.9721851357365376, "grad_norm": 1.2670643201391403, "learning_rate": 2.060196460610675e-08, "loss": 0.3998, "step": 4369 }, { "epoch": 0.9724076546506453, "grad_norm": 1.3326490622782083, "learning_rate": 2.0276467301664215e-08, "loss": 0.4072, "step": 4370 }, { "epoch": 0.972630173564753, "grad_norm": 1.3847775570221241, "learning_rate": 1.995355660881937e-08, "loss": 0.4167, "step": 4371 }, { "epoch": 0.9728526924788607, "grad_norm": 1.2424275601200687, "learning_rate": 1.963323269530193e-08, "loss": 0.3979, "step": 4372 }, { "epoch": 0.9730752113929684, "grad_norm": 1.2701119696723921, "learning_rate": 1.9315495727497137e-08, "loss": 0.4147, "step": 4373 }, { "epoch": 0.9732977303070761, "grad_norm": 1.5775064649489066, "learning_rate": 1.9000345870446303e-08, "loss": 0.3951, "step": 4374 }, { "epoch": 0.9735202492211839, "grad_norm": 1.2883744399891925, "learning_rate": 1.8687783287847926e-08, "loss": 0.3997, "step": 4375 }, { "epoch": 0.9737427681352915, "grad_norm": 1.2885986672030507, "learning_rate": 1.8377808142055475e-08, "loss": 0.4082, "step": 4376 }, { "epoch": 0.9739652870493992, "grad_norm": 1.2285881148344595, "learning_rate": 1.8070420594079042e-08, "loss": 0.4004, "step": 4377 }, { "epoch": 0.9741878059635068, "grad_norm": 1.339739007988351, "learning_rate": 1.7765620803585348e-08, "loss": 0.4114, "step": 4378 }, { "epoch": 0.9744103248776146, "grad_norm": 1.329815563400597, "learning_rate": 1.7463408928895532e-08, "loss": 0.4182, "step": 4379 }, { "epoch": 0.9746328437917223, "grad_norm": 1.2435709556064616, "learning_rate": 1.7163785126986797e-08, "loss": 0.4215, "step": 4380 }, { "epoch": 0.97485536270583, "grad_norm": 1.3298185698909206, "learning_rate": 1.6866749553492433e-08, "loss": 0.4019, "step": 4381 }, { "epoch": 0.9750778816199377, "grad_norm": 1.3821407686767715, "learning_rate": 1.65723023627018e-08, "loss": 0.4103, "step": 4382 }, { "epoch": 0.9753004005340454, "grad_norm": 1.2930670650684821, "learning_rate": 1.628044370755921e-08, "loss": 0.4035, "step": 4383 }, { "epoch": 0.9755229194481531, "grad_norm": 1.293534710638642, "learning_rate": 1.5991173739663967e-08, "loss": 0.399, "step": 4384 }, { "epoch": 0.9757454383622608, "grad_norm": 1.2772683916009855, "learning_rate": 1.5704492609271425e-08, "loss": 0.4129, "step": 4385 }, { "epoch": 0.9759679572763685, "grad_norm": 1.2675626842714143, "learning_rate": 1.5420400465292473e-08, "loss": 0.4138, "step": 4386 }, { "epoch": 0.9761904761904762, "grad_norm": 1.2866655831488123, "learning_rate": 1.5138897455291847e-08, "loss": 0.4108, "step": 4387 }, { "epoch": 0.9764129951045839, "grad_norm": 1.3036003901888005, "learning_rate": 1.4859983725490357e-08, "loss": 0.3862, "step": 4388 }, { "epoch": 0.9766355140186916, "grad_norm": 1.3250608447692114, "learning_rate": 1.4583659420764896e-08, "loss": 0.426, "step": 4389 }, { "epoch": 0.9768580329327993, "grad_norm": 1.324199004071251, "learning_rate": 1.4309924684645094e-08, "loss": 0.4101, "step": 4390 }, { "epoch": 0.977080551846907, "grad_norm": 1.3399665967508312, "learning_rate": 1.4038779659317769e-08, "loss": 0.4155, "step": 4391 }, { "epoch": 0.9773030707610146, "grad_norm": 1.3869954708854662, "learning_rate": 1.377022448562193e-08, "loss": 0.3961, "step": 4392 }, { "epoch": 0.9775255896751224, "grad_norm": 1.348899680161832, "learning_rate": 1.3504259303054323e-08, "loss": 0.4044, "step": 4393 }, { "epoch": 0.9777481085892301, "grad_norm": 1.3173386913125043, "learning_rate": 1.3240884249763886e-08, "loss": 0.4127, "step": 4394 }, { "epoch": 0.9779706275033377, "grad_norm": 1.4119862784227475, "learning_rate": 1.2980099462556184e-08, "loss": 0.3925, "step": 4395 }, { "epoch": 0.9781931464174455, "grad_norm": 1.3657693566407212, "learning_rate": 1.2721905076889529e-08, "loss": 0.4149, "step": 4396 }, { "epoch": 0.9784156653315532, "grad_norm": 1.3846791623322598, "learning_rate": 1.2466301226877752e-08, "loss": 0.3959, "step": 4397 }, { "epoch": 0.9786381842456608, "grad_norm": 1.2031146607668353, "learning_rate": 1.2213288045288541e-08, "loss": 0.3985, "step": 4398 }, { "epoch": 0.9788607031597686, "grad_norm": 1.2504493523980305, "learning_rate": 1.1962865663544544e-08, "loss": 0.3991, "step": 4399 }, { "epoch": 0.9790832220738763, "grad_norm": 1.3791482545058582, "learning_rate": 1.171503421172282e-08, "loss": 0.4166, "step": 4400 }, { "epoch": 0.979305740987984, "grad_norm": 1.3045066355874295, "learning_rate": 1.1469793818553176e-08, "loss": 0.4099, "step": 4401 }, { "epoch": 0.9795282599020917, "grad_norm": 1.2786708111193263, "learning_rate": 1.1227144611421492e-08, "loss": 0.4043, "step": 4402 }, { "epoch": 0.9797507788161994, "grad_norm": 1.4435681773561218, "learning_rate": 1.0987086716365835e-08, "loss": 0.4243, "step": 4403 }, { "epoch": 0.9799732977303071, "grad_norm": 1.4591476312167022, "learning_rate": 1.0749620258079241e-08, "loss": 0.4226, "step": 4404 }, { "epoch": 0.9801958166444148, "grad_norm": 1.4524220975165243, "learning_rate": 1.0514745359909706e-08, "loss": 0.4089, "step": 4405 }, { "epoch": 0.9804183355585224, "grad_norm": 1.2553795425070198, "learning_rate": 1.0282462143856864e-08, "loss": 0.4067, "step": 4406 }, { "epoch": 0.9806408544726302, "grad_norm": 1.4231571940902334, "learning_rate": 1.0052770730575867e-08, "loss": 0.4207, "step": 4407 }, { "epoch": 0.9808633733867379, "grad_norm": 1.4227244980971268, "learning_rate": 9.825671239374612e-09, "loss": 0.395, "step": 4408 }, { "epoch": 0.9810858923008455, "grad_norm": 1.270344144594411, "learning_rate": 9.601163788215406e-09, "loss": 0.3829, "step": 4409 }, { "epoch": 0.9813084112149533, "grad_norm": 1.2553537825120982, "learning_rate": 9.37924849371441e-09, "loss": 0.407, "step": 4410 }, { "epoch": 0.981530930129061, "grad_norm": 1.4055649776138608, "learning_rate": 9.159925471139419e-09, "loss": 0.4164, "step": 4411 }, { "epoch": 0.9817534490431686, "grad_norm": 1.2632018416647306, "learning_rate": 8.943194834414304e-09, "loss": 0.4082, "step": 4412 }, { "epoch": 0.9819759679572764, "grad_norm": 1.2777840330832306, "learning_rate": 8.729056696115123e-09, "loss": 0.4039, "step": 4413 }, { "epoch": 0.9821984868713841, "grad_norm": 1.2331584556694846, "learning_rate": 8.517511167470683e-09, "loss": 0.3983, "step": 4414 }, { "epoch": 0.9824210057854917, "grad_norm": 1.2845732656330897, "learning_rate": 8.308558358364193e-09, "loss": 0.4084, "step": 4415 }, { "epoch": 0.9826435246995995, "grad_norm": 1.297953844449292, "learning_rate": 8.102198377332172e-09, "loss": 0.4089, "step": 4416 }, { "epoch": 0.9828660436137072, "grad_norm": 1.3437155661407545, "learning_rate": 7.898431331563317e-09, "loss": 0.4309, "step": 4417 }, { "epoch": 0.9830885625278148, "grad_norm": 1.2694717621057454, "learning_rate": 7.697257326900187e-09, "loss": 0.4099, "step": 4418 }, { "epoch": 0.9833110814419226, "grad_norm": 1.3550375171808844, "learning_rate": 7.498676467838084e-09, "loss": 0.4143, "step": 4419 }, { "epoch": 0.9835336003560302, "grad_norm": 1.3724672851141104, "learning_rate": 7.3026888575267184e-09, "loss": 0.4149, "step": 4420 }, { "epoch": 0.983756119270138, "grad_norm": 1.2759234070466154, "learning_rate": 7.10929459776688e-09, "loss": 0.4063, "step": 4421 }, { "epoch": 0.9839786381842457, "grad_norm": 1.4320295302277422, "learning_rate": 6.918493789012659e-09, "loss": 0.4058, "step": 4422 }, { "epoch": 0.9842011570983533, "grad_norm": 1.3450495484778342, "learning_rate": 6.730286530372554e-09, "loss": 0.4033, "step": 4423 }, { "epoch": 0.9844236760124611, "grad_norm": 1.1933180132586, "learning_rate": 6.5446729196061434e-09, "loss": 0.3957, "step": 4424 }, { "epoch": 0.9846461949265688, "grad_norm": 1.336262727455836, "learning_rate": 6.361653053126305e-09, "loss": 0.4053, "step": 4425 }, { "epoch": 0.9848687138406764, "grad_norm": 1.3230893573001001, "learning_rate": 6.181227025999214e-09, "loss": 0.4065, "step": 4426 }, { "epoch": 0.9850912327547842, "grad_norm": 1.327576180467684, "learning_rate": 6.0033949319437956e-09, "loss": 0.4082, "step": 4427 }, { "epoch": 0.9853137516688919, "grad_norm": 1.2630739770944772, "learning_rate": 5.8281568633300475e-09, "loss": 0.395, "step": 4428 }, { "epoch": 0.9855362705829995, "grad_norm": 1.328044811379314, "learning_rate": 5.6555129111823815e-09, "loss": 0.3989, "step": 4429 }, { "epoch": 0.9857587894971073, "grad_norm": 1.1994547651599567, "learning_rate": 5.485463165176841e-09, "loss": 0.3755, "step": 4430 }, { "epoch": 0.985981308411215, "grad_norm": 1.2543719895990477, "learning_rate": 5.318007713642215e-09, "loss": 0.3914, "step": 4431 }, { "epoch": 0.9862038273253226, "grad_norm": 1.369339674773804, "learning_rate": 5.153146643559481e-09, "loss": 0.4026, "step": 4432 }, { "epoch": 0.9864263462394304, "grad_norm": 1.4111113937550317, "learning_rate": 4.990880040562918e-09, "loss": 0.4076, "step": 4433 }, { "epoch": 0.986648865153538, "grad_norm": 1.3281781279629166, "learning_rate": 4.831207988937325e-09, "loss": 0.4231, "step": 4434 }, { "epoch": 0.9868713840676457, "grad_norm": 1.2748700920590583, "learning_rate": 4.674130571621915e-09, "loss": 0.4153, "step": 4435 }, { "epoch": 0.9870939029817535, "grad_norm": 1.3970366620382828, "learning_rate": 4.519647870206978e-09, "loss": 0.4146, "step": 4436 }, { "epoch": 0.9873164218958611, "grad_norm": 1.3324824075633368, "learning_rate": 4.367759964934992e-09, "loss": 0.398, "step": 4437 }, { "epoch": 0.9875389408099688, "grad_norm": 1.3631062109154855, "learning_rate": 4.218466934701182e-09, "loss": 0.4103, "step": 4438 }, { "epoch": 0.9877614597240766, "grad_norm": 1.3996709772132647, "learning_rate": 4.071768857052405e-09, "loss": 0.4025, "step": 4439 }, { "epoch": 0.9879839786381842, "grad_norm": 1.338914037269949, "learning_rate": 3.927665808188263e-09, "loss": 0.4042, "step": 4440 }, { "epoch": 0.988206497552292, "grad_norm": 1.2832941465870564, "learning_rate": 3.7861578629594385e-09, "loss": 0.4076, "step": 4441 }, { "epoch": 0.9884290164663997, "grad_norm": 1.2754398778637583, "learning_rate": 3.647245094869356e-09, "loss": 0.4082, "step": 4442 }, { "epoch": 0.9886515353805073, "grad_norm": 1.341103076385482, "learning_rate": 3.5109275760736304e-09, "loss": 0.4113, "step": 4443 }, { "epoch": 0.9888740542946151, "grad_norm": 1.4329393278873048, "learning_rate": 3.377205377379511e-09, "loss": 0.4051, "step": 4444 }, { "epoch": 0.9890965732087228, "grad_norm": 1.278986562606672, "learning_rate": 3.246078568246436e-09, "loss": 0.3814, "step": 4445 }, { "epoch": 0.9893190921228304, "grad_norm": 1.315083241656401, "learning_rate": 3.1175472167843667e-09, "loss": 0.4091, "step": 4446 }, { "epoch": 0.9895416110369382, "grad_norm": 1.399763942785808, "learning_rate": 2.9916113897571207e-09, "loss": 0.4154, "step": 4447 }, { "epoch": 0.9897641299510458, "grad_norm": 1.472093601044324, "learning_rate": 2.8682711525790387e-09, "loss": 0.4101, "step": 4448 }, { "epoch": 0.9899866488651535, "grad_norm": 1.332674519148499, "learning_rate": 2.7475265693160947e-09, "loss": 0.4014, "step": 4449 }, { "epoch": 0.9902091677792613, "grad_norm": 1.2959115987303096, "learning_rate": 2.629377702687563e-09, "loss": 0.3997, "step": 4450 } ], "logging_steps": 1.0, "max_steps": 4494, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 458283165356032.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }